In [1]:
import os
import pandas as pd

BASE_PATH = "."  # current directory

In [2]:
def get_drive_folders(base_path):
    return [
        f for f in os.listdir(base_path)
        if f.startswith("D3-") and os.path.isdir(os.path.join(base_path, f))
    ]

drive_folders = get_drive_folders(BASE_PATH)
drive_folders

['D3-Normal-motor', 'D3-Aggressive-motor']

In [3]:
def get_behavior(folder):
    name = folder.lower()
    if "drowsy" in name:
        return "drowsy"
    elif "aggressive" in name:
        return "aggressive"
    else:
        return "normal"

def get_road_type(folder):
    return "motor" if "motor" in folder.lower() else "secondary"

In [4]:
def load_gps(folder):
    gps = pd.read_csv(
        f"{folder}/RAW_GPS.txt",
        delim_whitespace=True,
        header=None,
        names=[
            "timestamp",      # 1
            "speed_kmh",      # 2
            "lat",            # 3
            "lon",            # 4
            "alt",            # 5
            "vert_acc",       # 6
            "horiz_acc",      # 7
            "course",         # 8
            "difcourse"       # 9
            "hdop",             # 10
            "vdop",             # 11
            "pdop"              # 12
        ]
    )

    gps["timestamp"] = gps["timestamp"].astype(int)
    return gps

In [5]:
def load_acc(folder):
    acc = pd.read_csv(
        f"{folder}/RAW_ACCELEROMETERS.txt",
        delim_whitespace=True,
        header=None,
        names=[
            "timestamp",          # 1
            "system_active",     # 2
            "acc_x",              # 3
            "acc_y",              # 4
            "acc_z",              # 5
            "acc_x_kf",           # 6
            "acc_y_kf",           # 7
            "acc_z_kf",           # 8
            "roll",               # 9
            "pitch",              # 10
            "yaw"                 # 11
        ]
    )

    acc["timestamp"] = acc["timestamp"].astype(int)
    return acc

In [6]:
def load_lane(folder):
    return pd.read_csv(
        f"{folder}/PROC_LANE_DETECTION.txt",
        delim_whitespace=True,
        header=None,
        names=[
            "timestamp",
            "x_lane",
            "phi_lane",
            "road_width",
            "lane_state"
        ]
    )

In [7]:
def load_vehicle(folder):
    return pd.read_csv(
        f"{folder}/PROC_VEHICLE_DETECTION.txt",
        delim_whitespace=True,
        header=None,
        names=[
            "timestamp",
            "dist_ahead_vehicle",
            "time_to_impact",
            "num_detected_vehicles",
            "gps_speed_kmh"
        ]
    )

In [8]:
def load_osm(folder):
    return pd.read_csv(
        f"{folder}/PROC_OPENSTREETMAP_DATA.txt",
        delim_whitespace=True,
        header=None,
        names=[
            "timestamp",
            "max_speed_kmh",
            "max_speed_reliability",
            "road_type",
            "num_lanes",
            "estimated_lane",
            "osm_lat",
            "osm_lon",
            "osm_query_delay",
            "gps_speed_kmh"
        ]
    )

In [9]:
def build_phase2_drive(folder):
    path = os.path.join(BASE_PATH, folder)

    # ---------- RAW_GPS (1 Hz) ----------
    gps = pd.read_csv(
        f"{path}/RAW_GPS.txt",
        sep=r"\s+",
        header=None
    )
    gps.columns = [
        "timestamp", "speed_kmh", "lat", "lon", "alt",
        "vert_acc", "horiz_acc", "course", "difcourse",
        "hdop", "vdop", "pdop"
    ]
    gps["t_sec"] = gps["timestamp"].astype(int)
    gps_1hz = gps.drop(columns=["timestamp"])


    # ---------- RAW_ACCELEROMETERS (10 Hz → 1 Hz) ----------
    acc = pd.read_csv(
        f"{path}/RAW_ACCELEROMETERS.txt",
        sep=r"\s+",
        header=None
    )
    acc.columns = [
        "timestamp", "active",
        "acc_x", "acc_y", "acc_z",
        "acc_x_kf", "acc_y_kf", "acc_z_kf",
        "roll", "pitch", "yaw"
    ]
    acc["t_sec"] = acc["timestamp"].astype(int)
    acc_1hz = (
        acc
        .drop(columns=["timestamp"])
        .groupby("t_sec")
        .mean()
        .reset_index()
    )


    # ---------- PROC_LANE_DETECTION (~30 Hz → 1 Hz) ----------
    lane = pd.read_csv(
        f"{path}/PROC_LANE_DETECTION.txt",
        sep=r"\s+",
        header=None
    )
    lane.columns = ["timestamp", "x_lane", "phi", "road_width", "lane_state"]
    lane["t_sec"] = lane["timestamp"].astype(int)
    lane_1hz = (
        lane
        .drop(columns=["timestamp"])
        .groupby("t_sec")
        .mean()
        .reset_index()
    )


    # ---------- PROC_VEHICLE_DETECTION (~10 Hz → 1 Hz) ----------
    veh = pd.read_csv(
        f"{path}/PROC_VEHICLE_DETECTION.txt",
        sep=r"\s+",
        header=None
    )
    veh.columns = [
        "timestamp", "dist_front", "ttc_front",
        "num_vehicles", "gps_speed"
    ]
    veh["t_sec"] = veh["timestamp"].astype(int)
    veh_1hz = (
        veh
        .drop(columns=["timestamp"])
        .groupby("t_sec")
        .mean()
        .reset_index()
    )


    # ---------- PROC_OPENSTREETMAP_DATA (~1 Hz) ----------
    osm = pd.read_csv(
        f"{path}/PROC_OPENSTREETMAP_DATA.txt",
        sep=r"\s+",
        header=None
    )
    osm.columns = [
        "timestamp", "max_speed", "speed_rel",
        "road_type_osm", "num_lanes", "lane_id",
        "lat_osm", "lon_osm", "osm_delay", "gps_speed_osm"
    ]
    osm["t_sec"] = osm["timestamp"].astype(int)
    osm_1hz = osm.drop(columns=["timestamp"])


    # ---------- MERGE ALL (aligned on t_sec) ----------
    data = gps_1hz.merge(acc_1hz, on="t_sec", how="inner")
    data = data.merge(lane_1hz, on="t_sec", how="inner")
    data = data.merge(veh_1hz, on="t_sec", how="inner")
    data = data.merge(osm_1hz, on="t_sec", how="inner")


    # ---------- LABELS ----------
    data["driver"] = folder.split("-")[0]
    data["behavior"] = folder.split("-")[1]
    data["road_type"] = folder.split("-")[2]

    # keep t_sec as first column for debugging / windowing
    cols = ["t_sec"] + [c for c in data.columns if c != "t_sec"]
    data = data[cols]


    return data


In [10]:
all_drives = []

for folder in drive_folders:
    print("Processing:", folder)
    df = build_phase2_drive(folder)
    all_drives.append(df)

driver1_phase2 = pd.concat(all_drives, ignore_index=True)


Processing: D3-Normal-motor
Processing: D3-Aggressive-motor


In [11]:
driver1_phase2.shape

(1050, 42)

In [12]:
driver1_phase2.to_csv("D3_phase2_LESS.csv", index=False)