In [None]:
import pandas as pd
import sys

INPUT_FILE = "can_log_dataset.csv"
OUTPUT_FILE = "decoded_telemetry.csv"
CHUNK_SIZE = 50000  # Memory saver 

def hex_to_bytes(hex_str):
    if pd.isna(hex_str): return []
    try:
        return [int(x, 16) for x in str(hex_str).strip().split()]
    except ValueError: return []

def get_le_val(data, start, length=2, signed=False):
    if len(data) < start + length: return 0
    val = 0
    for i in range(length):
        val |= (data[start + i] << (8 * i))
    if signed:
        max_val = 1 << (length * 8)
        if val >= max_val // 2: val -= max_val
    return val

first_chunk = True
last_values = None   # Context for forward fill

try:
    reader = pd.read_csv(
        INPUT_FILE,
        encoding="latin1",
        sep=None,
        engine="python",
        skipinitialspace=True,
        chunksize=CHUNK_SIZE
    )
except FileNotFoundError:
    print(f"Error: {INPUT_FILE} not found.")
    sys.exit(1)

for chunk in reader:
    # 1. Clean Headers & Data
    chunk.columns = chunk.columns.str.strip()
    for col in chunk.select_dtypes(include="object"):
        chunk[col] = chunk[col].str.strip()

    # 2. Time Sort (Ensures Chronological Order)
    chunk["Timestamp"] = pd.to_datetime(chunk["Time scale"], errors="coerce")
    chunk = chunk.dropna(subset=["Timestamp"]).sort_values("Timestamp")

    decoded_rows = []

    # 3. Decode Frames
    for _, row in chunk.iterrows():
        try:
            msg_id = int(str(row["Frame Id"]), 16)
            data = hex_to_bytes(row["Data(Hex)"])
            ts = row["Timestamp"]
            entry = {"Time": ts}

            # Safety & Network
            if msg_id == 0x04 and len(data) >= 1:
                entry["Safety_EStop"] = data[0]
            elif msg_id == 0x05 and len(data) >= 1:
                entry["Safety_MotorsLocked"] = data[0]

            # Node Status Request
            elif msg_id == 0x07 and len(data) >= 1:
                entry["Node_status_request"] = data[0] # 0=All, or specific ID

            # Node Heartbeats
            elif msg_id in [0x08, 0x09, 0x0A, 0x0D, 0x0F, 0x10] and len(data) >= 1:
                status_map = {
                    0x08: "Status_ECU", 0x09: "Status_BMS", 0x0A: "Status_TPS",
                    0x0D: "Status_FW",  0x0F: "Status_RLW", 0x10: "Status_RRW"
                }
                entry[status_map[msg_id]] = data[0]

            # --- 0x20: Throttle & Brake (New Logic) ---
            elif msg_id == 0x20 and len(data) >= 5:
                # B0: Throttle %
                entry["Throttle_Pct"] = data[0] * 0.4

                # B2: Brake Pressure (Front/Default)
                entry["Brake_Pressure_Front_kPa"] = data[2] * 4.0

                # B3: Brake Pressure (Rear) - If 255, B2 is default (Rear is unused/unknown)
                if data[3] != 255:
                    entry["Brake_Pressure_Rear_kPa"] = data[3] * 4.0
                else:
                    entry["Brake_Pressure_Rear_kPa"] = 0

                # B4: Brake Pedal %
                entry["Brake_Pedal_Pct"] = data[4] * 0.4

            # --- 0x22: Steering (Existing Logic) ---
            elif msg_id == 0x22 and len(data) >= 2:
                entry["Steering_Angle_Deg"] = get_le_val(data, 0, signed=True) * 0.1 - 180

            # --- 0x24: Battery Stats (New Logic) ---
            elif msg_id == 0x24 and len(data) >= 8: # Assuming 8 bytes for full frame
                # B0-B1: Voltage
                entry["Battery_Voltage_V"] = get_le_val(data, 0, 2) * 0.1

                # B2-B3: Current (Offset -320)
                # User formula: -320 + (B3*256 + B2)*0.1
                raw_curr = get_le_val(data, 2, 2, signed=False)
                entry["Battery_Current_A"] = -320 + (raw_curr * 0.1)

                # B4: SOC
                entry["Battery_SOC_Pct"] = data[4]

                # B5-B6: Resistance
                # User formula: (B6*256+B5)*1000
                raw_res = get_le_val(data, 5, 2, signed=False)
                entry["Battery_Resistance_Ohms"] = raw_res * 0.000001

                # B7: Highest Cell Temp
                # User formula: B8 - 40. Assuming B8 is 8th byte (index 7).
                entry["Battery_Temp_Max_C"] = data[7] - 40.0

            # --- 0x25: Voltage Rails (New Logic) ---
            elif msg_id == 0x25 and len(data) >= 3:
                # B1: 5V Rail
                entry["Voltage_5V_Rail_V"] = data[1] / 36.0
                # B2: 12V Rail
                entry["Voltage_12V_Rail_V"] = data[2] / 10.0

            # --- 0x30: Target Throttles (Existing) ---
            elif msg_id == 0x30 and len(data) >= 4:
                entry["Target_Thr_FL"] = data[0] * 0.4
                entry["Target_Thr_FR"] = data[1] * 0.4
                entry["Target_Thr_RL"] = data[2] * 0.4
                entry["Target_Thr_RR"] = data[3] * 0.4

            # --- 0x34-0x37: Wheel Speed (Existing) ---
            elif msg_id in [0x34, 0x35, 0x36, 0x37] and len(data) >= 2:
                wheel_map = {0x34: "FL", 0x35: "FR", 0x36: "RL", 0x37: "RR"}
                entry[f"Wheel_RPM_{wheel_map[msg_id]}"] = get_le_val(data, 0) / 30.0

            # --- 0x38: Vehicle Speed (New Logic - Replaces IMU) ---
            elif msg_id == 0x38 and len(data) >= 2:
                # User formula: (B1*256 + B0)/256
                raw_speed = get_le_val(data, 0, 2, signed=False)
                entry["Vehicle_Speed_kmh"] = raw_speed / 256.0

            if len(entry) > 1:
                decoded_rows.append(entry)

        except Exception: continue

    
    if decoded_rows:
        out_df = pd.DataFrame(decoded_rows)

        out_df = out_df.sort_values("Time").reset_index(drop=True)


        # Continui  ty: Merge with previous chunk's last row
        if last_values is not None:
            out_df = pd.concat([last_values, out_df], ignore_index=True)

        # Fill missing values (Forward Fill)
        out_df = out_df.ffill()

        out_df = out_df.dropna(how="any")


        # Save last valid row for next chunk's context
        last_values = out_df.tail(1)

        # Remove the carry-over row (to avoid duplicates or edge cases) and save
        out_df = out_df.iloc[:-1]
        out_df["Time"] = out_df["Time"].dt.strftime('%H:%M:%S.%f').str[:-3]

        if not out_df.empty:
            out_df.to_csv(
                OUTPUT_FILE,
                mode="w" if first_chunk else "a",
                header=first_chunk,
                index=False
            )
            first_chunk = False

print(" Complete! Data is decoded, chronologically sorted, and saved.")
print(" File:", OUTPUT_FILE)