# CAN BUS decoding script (To be used in pipeline)

In [None]:
import pandas as pd
import sys
from typing import List, Dict


In [None]:
import pandas as pd
import sys

INPUT_FILE = "can_log_dataset.csv"
OUTPUT_FILE = "decoded_telemetry.csv"
CHUNK_SIZE = 50000  # Memory saver

def hex_to_bytes(hex_str):
    if pd.isna(hex_str): return []
    try:
        return [int(x, 16) for x in str(hex_str).strip().split()]
    except ValueError: return []

def get_le_val(data, start, length=2, signed=False):
    if len(data) < start + length: return 0
    val = 0
    for i in range(length):
        val |= (data[start + i] << (8 * i))
    if signed:
        max_val = 1 << (length * 8)
        if val >= max_val // 2: val -= max_val
    return val

first_chunk = True
last_values = None   # Context for forward fill

try:
    reader = pd.read_csv(
        INPUT_FILE,
        encoding="latin1",
        sep=None,
        engine="python",
        skipinitialspace=True,
        chunksize=CHUNK_SIZE
    )
except FileNotFoundError:
    print(f"Error: {INPUT_FILE} not found.")
    sys.exit(1)

for chunk in reader:
    # Clean Headers & Data
    chunk.columns = chunk.columns.str.strip()
    for col in chunk.select_dtypes(include="object"):
        chunk[col] = chunk[col].str.strip()

    # Time Sort (Ensures Chronological Order)
    chunk["Timestamp"] = pd.to_datetime(chunk["Time scale"], errors="coerce")
    chunk = chunk.dropna(subset=["Timestamp"]).sort_values("Timestamp")

    decoded_rows = []

    # Decode Frames
    for _, row in chunk.iterrows():
        try:
            msg_id = int(str(row["Frame Id"]), 16)
            data = hex_to_bytes(row["Data(Hex)"])
            ts = row["Timestamp"]
            entry = {"Time": ts}

            # Safety & Network
            if msg_id == 0x04 and len(data) >= 1:
                entry["Safety_EStop"] = data[0]
            elif msg_id == 0x05 and len(data) >= 1:
                entry["Safety_MotorsLocked"] = data[0]

            # Node Status Request
            elif msg_id == 0x07 and len(data) >= 1:
                entry["Node_status_request"] = data[0] # 0=All, or specific ID

            # Node Heartbeats
            elif msg_id in [0x08, 0x09, 0x0A, 0x0D, 0x0F, 0x10] and len(data) >= 1:
                status_map = {
                    0x08: "Status_ECU", 0x09: "Status_BMS", 0x0A: "Status_TPS",
                    0x0D: "Status_FW",  0x0F: "Status_RLW", 0x10: "Status_RRW"
                }
                entry[status_map[msg_id]] = data[0]

            # --- 0x20: Throttle & Brake (New Logic) ---
            elif msg_id == 0x20 and len(data) >= 5:
                # B0: Throttle %
                entry["Throttle_Pct"] = data[0] * 0.4

                # B2: Brake Pressure (Front/Default)
                entry["Brake_Pressure_Front_kPa"] = data[2] * 4.0

                # B3: Brake Pressure (Rear) - If 255, B2 is default (Rear is unused/unknown)
                if data[3] != 255:
                    entry["Brake_Pressure_Rear_kPa"] = data[3] * 4.0
                else:
                    entry["Brake_Pressure_Rear_kPa"] = 0

                # B4: Brake Pedal %
                entry["Brake_Pedal_Pct"] = data[4] * 0.4

            # --- 0x22: Steering (Existing Logic) ---
            elif msg_id == 0x22 and len(data) >= 2:
                entry["Steering_Angle_Deg"] = get_le_val(data, 0, signed=True) * 0.1 - 180

            # --- 0x24: Battery Stats (New Logic) ---
            elif msg_id == 0x24 and len(data) >= 8: # Assuming 8 bytes for full frame
                # B0-B1: Voltage
                entry["Battery_Voltage_V"] = get_le_val(data, 0, 2) * 0.1

                # B2-B3: Current (Offset -320)
                # User formula: -320 + (B3*256 + B2)*0.1
                raw_curr = get_le_val(data, 2, 2, signed=False)
                entry["Battery_Current_A"] = -320 + (raw_curr * 0.1)

                # B4: SOC
                entry["Battery_SOC_Pct"] = data[4]

                # B5-B6: Resistance
                # User formula: (B6*256+B5)*1000
                raw_res = get_le_val(data, 5, 2, signed=False)
                entry["Battery_Resistance_Ohms"] = raw_res * 0.000001

                # B7: Highest Cell Temp
                # User formula: B8 - 40. Assuming B8 is 8th byte (index 7).
                entry["Battery_Temp_Max_C"] = data[7] - 40.0

            # --- 0x25: Voltage Rails (New Logic) ---
            elif msg_id == 0x25 and len(data) >= 3:
                # B1: 5V Rail
                entry["Voltage_5V_Rail_V"] = data[1] / 36.0
                # B2: 12V Rail
                entry["Voltage_12V_Rail_V"] = data[2] / 10.0

            # --- 0x30: Target Throttles (Existing) ---
            elif msg_id == 0x30 and len(data) >= 4:
                entry["Target_Thr_FL"] = data[0] * 0.4
                entry["Target_Thr_FR"] = data[1] * 0.4
                entry["Target_Thr_RL"] = data[2] * 0.4
                entry["Target_Thr_RR"] = data[3] * 0.4

            # --- 0x34-0x37: Wheel Speed (Existing) ---
            elif msg_id in [0x34, 0x35, 0x36, 0x37] and len(data) >= 2:
                wheel_map = {0x34: "FL", 0x35: "FR", 0x36: "RL", 0x37: "RR"}
                entry[f"Wheel_RPM_{wheel_map[msg_id]}"] = get_le_val(data, 0) / 30.0

            # --- 0x38: Vehicle Speed (New Logic - Replaces IMU) ---
            elif msg_id == 0x38 and len(data) >= 2:
                # User formula: (B1*256 + B0)/256
                raw_speed = get_le_val(data, 0, 2, signed=False)
                entry["Vehicle_Speed_kmh"] = raw_speed / 256.0

            if len(entry) > 1:
                decoded_rows.append(entry)

        except Exception: continue

    # 4. Save Logic (with Forward Fill)
    if decoded_rows:
        out_df = pd.DataFrame(decoded_rows)

        out_df = out_df.sort_values("Time").reset_index(drop=True)


        # Continui  ty: Merge with previous chunk's last row
        if last_values is not None:
            out_df = pd.concat([last_values, out_df], ignore_index=True)

        # Fill missing values (Forward Fill)
        out_df = out_df.ffill()

        out_df = out_df.dropna(how="any")


        # Save last valid row for next chunk's context
        last_values = out_df.tail(1)

        # Remove the carry-over row (to avoid duplicates or edge cases) and save
        out_df = out_df.iloc[:-1]
        out_df["Time"] = out_df["Time"].dt.strftime('%H:%M:%S.%f').str[:-3]

        if not out_df.empty:
            out_df.to_csv(
                OUTPUT_FILE,
                mode="w" if first_chunk else "a",
                header=first_chunk,
                index=False
            )
            first_chunk = False

print(" Complete! Data is decoded, chronologically sorted, and saved.")
print(" File:", OUTPUT_FILE)

  chunk["Timestamp"] = pd.to_datetime(chunk["Time scale"], errors="coerce")


 Complete! Data is decoded, chronologically sorted, and saved.
 File: decoded_telemetry.csv


# Model Training (Isolation forest)

In [None]:
import pandas as pd
import numpy as np
import joblib

from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

INPUT_FILE = "features_only.csv"
MODEL_FILE = "isolation_forest_fixed.pkl"


df = pd.read_csv(INPUT_FILE)

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())


X = df.copy()

# Drop NaNs
X = X.replace([np.inf, -np.inf], np.nan).dropna()

print("Training samples:", len(X))


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


iso = IsolationForest(
    n_estimators=200,
    contamination=0.05,   # 5% anomalies
    random_state=42
    max_samples=Auto
)

iso.fit(X_scaled)

scores = iso.decision_function(X_scaled)   # higher = normal
raw_scores = iso.score_samples(X_scaled)   # lower = more anomalous

# Convert to anomaly score (positive = more anomalous)
anomaly_score = -raw_scores

df["if_score"] = anomaly_score


threshold = np.percentile(anomaly_score, 95)

df["if_anomaly"] = (df["if_score"] > threshold).astype(int)

print("\nThreshold:", threshold)
print("Anomalies detected:", df["if_anomaly"].sum())

model_bundle = {
    "model": iso,
    "scaler": scaler,
    "features": X.columns.tolist(),
    "threshold": threshold
}

joblib.dump(model_bundle, MODEL_FILE)

print("\n✅ Model saved:", MODEL_FILE)

Shape: (1939, 17)
Columns: ['Vehicle_Speed_kmh', 'Vehicle_Speed_RPM_mean', 'Wheel_RPM_std', 'Battery_Voltage_V', 'Battery_Current_A', 'Battery_Power_W', 'Power_per_Speed', 'Battery_SOC_Pct', 'Battery_Resistance_Ohms', 'Battery_Temp_Max_C', 'Temp_Stress_Index', 'Throttle_Pct', 'Torque_Command_Error', 'Brake_Pedal_Pct', 'Brake_Pressure_Front_kPa', 'Brake_Pressure_Rear_kPa', 'Brake_Pressure_Diff']
Training samples: 1939

Threshold: 0.5991845223399249
Anomalies detected: 39

✅ Model saved: isolation_forest_fixed.pkl


In [None]:
import numpy as np
import joblib


model_bundle = joblib.load("isolation_forest_fixed.pkl")

model = model_bundle["model"]
scaler = model_bundle["scaler"]
FEATURES = model_bundle["features"]
threshold = model_bundle["threshold"]

print("Loaded features:", FEATURES)
print("Threshold:", threshold)


sample = np.zeros((1, len(FEATURES)))

# Fill with reasonable baseline values
for i, f in enumerate(FEATURES):
    sample[0][i] = 1.0  # generic baseline


test_sample = sample.copy()

# Example: spike battery current if exists
if "Battery_Current_A" in FEATURES:
    idx = FEATURES.index("Battery_Current_A")
    test_sample[0][idx] = 1000  # extreme anomaly

print("\n===== TEST SAMPLE =====")
for f, v in zip(FEATURES, test_sample[0]):
    print(f"{f}: {v}")


X_scaled = scaler.transform(test_sample)

raw_score = model.score_samples(X_scaled)[0]
anomaly_score = -raw_score


print("\n===== RESULT =====")

if anomaly_score > threshold:
    print("Anomaly detected!")
else:
    print("Normal")

print("Anomaly score:", anomaly_score)
print("Threshold:", threshold)

confidence = anomaly_score / (threshold + 1e-6)
print("Confidence:", confidence)

Loaded features: ['Vehicle_Speed_kmh', 'Vehicle_Speed_RPM_mean', 'Wheel_RPM_std', 'Battery_Voltage_V', 'Battery_Current_A', 'Battery_Power_W', 'Power_per_Speed', 'Battery_SOC_Pct', 'Battery_Resistance_Ohms', 'Battery_Temp_Max_C', 'Temp_Stress_Index', 'Throttle_Pct', 'Torque_Command_Error', 'Brake_Pedal_Pct', 'Brake_Pressure_Front_kPa', 'Brake_Pressure_Rear_kPa', 'Brake_Pressure_Diff']
Threshold: 0.5991845223399249

===== TEST SAMPLE =====
Vehicle_Speed_kmh: 1.0
Vehicle_Speed_RPM_mean: 1.0
Wheel_RPM_std: 1.0
Battery_Voltage_V: 1.0
Battery_Current_A: 1000.0
Battery_Power_W: 1.0
Power_per_Speed: 1.0
Battery_SOC_Pct: 1.0
Battery_Resistance_Ohms: 1.0
Battery_Temp_Max_C: 1.0
Temp_Stress_Index: 1.0
Throttle_Pct: 1.0
Torque_Command_Error: 1.0
Brake_Pedal_Pct: 1.0
Brake_Pressure_Front_kPa: 1.0
Brake_Pressure_Rear_kPa: 1.0
Brake_Pressure_Diff: 1.0

===== RESULT =====
Anomaly detected!
Anomaly score: 0.7744913748748863
Threshold: 0.5991845223399249
Confidence: 1.292573578631141




In [None]:
import joblib
import numpy as np

bundle = joblib.load("isolation_forest_fixed.pkl")

iso = bundle["model"]
scaler = bundle["scaler"]
features = bundle["features"]
threshold = bundle["threshold"]

with open("iforest_model.h", "w") as f:

    f.write("#ifndef IFOREST_MODEL_H\n")
    f.write("#define IFOREST_MODEL_H\n\n")

    f.write(f"#define NUM_FEATURES {len(features)}\n")
    f.write(f"#define NUM_TREES {len(iso.estimators_)}\n\n")

    
    f.write("// Scaler mean\n")
    f.write("float scaler_mean[NUM_FEATURES] = {")
    f.write(",".join(map(str, scaler.mean_)))
    f.write("};\n\n")

    f.write("// Scaler scale\n")
    f.write("float scaler_scale[NUM_FEATURES] = {")
    f.write(",".join(map(str, scaler.scale_)))
    f.write("};\n\n")

   
    f.write(f"float IF_THRESHOLD = {threshold};\n\n")

    
    for i, est in enumerate(iso.estimators_):
        tree = est.tree_

        f.write(f"// ===== TREE {i} =====\n")

        f.write(f"int tree_{i}_feature[] = {{")
        f.write(",".join(map(str, tree.feature)))
        f.write("};\n")

        f.write(f"float tree_{i}_threshold[] = {{")
        f.write(",".join(map(str, tree.threshold)))
        f.write("};\n")

        f.write(f"int tree_{i}_left[] = {{")
        f.write(",".join(map(str, tree.children_left)))
        f.write("};\n")

        f.write(f"int tree_{i}_right[] = {{")
        f.write(",".join(map(str, tree.children_right)))
        f.write("};\n\n")

    f.write("#endif\n")

print("Header file generated: iforest_model.h")

✅ Header file generated: iforest_model.h


# K-means clustering

In [None]:
import pandas as pd
import numpy as np
import joblib

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler


INPUT_FILE = "decoded_telemetry.csv"
OUTPUT_FILE = "telemetry_with_kmeans.csv"
MODEL_FILE = "kmeans_model.pkl"


df = pd.read_csv(INPUT_FILE, parse_dates=["Time"])
df = df.sort_values("Time").reset_index(drop=True)

print("Shape:", df.shape)

#Feature engineering
rpm_cols = [c for c in df.columns if "Wheel_RPM" in c]

df["Vehicle_Speed_RPM_mean"] = df[rpm_cols].mean(axis=1)
df["Wheel_RPM_std"] = df[rpm_cols].std(axis=1)

df["Battery_Power_W"] = df["Battery_Voltage_V"] * df["Battery_Current_A"]

df["Power_per_Speed"] = df["Battery_Power_W"] / (
    df["Vehicle_Speed_kmh"] + 1e-3
)

thr_cols = ["Target_Thr_FL","Target_Thr_FR","Target_Thr_RL","Target_Thr_RR"]
df["Target_Thr_mean"] = df[thr_cols].mean(axis=1)

df["Torque_Command_Error"] = (
    df["Target_Thr_mean"] - df["Throttle_Pct"]
).abs()

df["Brake_Pressure_Diff"] = (
    df["Brake_Pressure_Front_kPa"] -
    df["Brake_Pressure_Rear_kPa"]
).abs()

df["Temp_Stress_Index"] = (
    df["Battery_Temp_Max_C"] *
    df["Battery_Current_A"].abs()
)


FEATURES = [
    "Vehicle_Speed_kmh",
    "Vehicle_Speed_RPM_mean",
    "Wheel_RPM_std",
    "Battery_Voltage_V",
    "Battery_Current_A",
    "Battery_Power_W",
    "Power_per_Speed",
    "Battery_SOC_Pct",
    "Battery_Resistance_Ohms",
    "Battery_Temp_Max_C",
    "Temp_Stress_Index",
    "Throttle_Pct",
    "Torque_Command_Error",
    "Brake_Pedal_Pct",
    "Brake_Pressure_Front_kPa",
    "Brake_Pressure_Rear_kPa",
    "Brake_Pressure_Diff"
]

FEATURES = [c for c in FEATURES if c in df.columns]

X = df[FEATURES].dropna()
df = df.loc[X.index]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

K_RANGE = range(2, 6)
scores = {}

for k in K_RANGE:
    km = KMeans(n_clusters=k, random_state=42, n_init=20)
    labels = km.fit_predict(X_scaled)
    scores[k] = silhouette_score(X_scaled, labels)

best_k = max(scores, key=scores.get)

print("\nBest K:", best_k)
print("Scores:", scores)

kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=30)
df["cluster"] = kmeans.fit_predict(X_scaled)


centroids = kmeans.cluster_centers_

distances = np.linalg.norm(
    X_scaled - centroids[df["cluster"]],
    axis=1
)

df["kmeans_distance"] = distances

threshold = np.percentile(distances, 95)

df["kmeans_anomaly"] = (distances > threshold).astype(int)

print("\nThreshold:", threshold)
print("Anomalies:", df["kmeans_anomaly"].sum())

df.to_csv(OUTPUT_FILE, index=False)

model_bundle = {
    "kmeans": kmeans,
    "scaler": scaler,
    "features": FEATURES,
    "threshold": threshold
}

joblib.dump(model_bundle, MODEL_FILE)

print("\nModel saved as:", MODEL_FILE)

  df = pd.read_csv(INPUT_FILE, parse_dates=["Time"])


Shape: (1939, 28)

Best K: 5
Scores: {2: np.float64(0.9193560905206294), 3: np.float64(0.9529832783569747), 4: np.float64(0.99870644744024), 5: np.float64(1.0)}

Threshold: 1.2889857856405219e-14
Anomalies: 0

✅ Model saved as: kmeans_model.pkl


In [None]:
import numpy as np
import joblib


model_bundle = joblib.load("kmeans_model.pkl")

kmeans = model_bundle["kmeans"]
scaler = model_bundle["scaler"]
FEATURES = model_bundle["features"]
threshold = model_bundle["threshold"]

print("Loaded features:", FEATURES)
print("Threshold:", threshold)

sample = np.array([[
    10,     # speed
    100,    # rpm mean
    5,      # rpm std
    48,     # voltage
    10,     # current
    480,    # power
    40,     # power/speed
    80,     # SOC
    0.01,   # resistance
    35,     # temp
    350,    # temp stress
    20,     # throttle
    2,      # torque error
    10,     # brake pedal
    200,    # brake front
    180,    # brake rear
    20      # brake diff
]])


test_sample = sample.copy()


test_sample[0][FEATURES.index("Battery_Current_A")] = 1000

print("\n===== TEST SAMPLE =====")
for f, v in zip(FEATURES, test_sample[0]):
    print(f"{f}: {v}")


X_scaled = scaler.transform(test_sample)


cluster = kmeans.predict(X_scaled)[0]

centroid = kmeans.cluster_centers_[cluster]

distance = np.linalg.norm(X_scaled[0] - centroid)


print("\n===== RESULT =====")

if distance > threshold:
    print("Anomaly detected!")
else:
    print("Normal")

print("Cluster:", cluster)
print("Distance:", distance)
print("Threshold:", threshold)

# Confidence (normalized)
confidence = distance / (threshold + 1e-6)
print("Confidence:", confidence)

Loaded features: ['Vehicle_Speed_kmh', 'Vehicle_Speed_RPM_mean', 'Wheel_RPM_std', 'Battery_Voltage_V', 'Battery_Current_A', 'Battery_Power_W', 'Power_per_Speed', 'Battery_SOC_Pct', 'Battery_Resistance_Ohms', 'Battery_Temp_Max_C', 'Temp_Stress_Index', 'Throttle_Pct', 'Torque_Command_Error', 'Brake_Pedal_Pct', 'Brake_Pressure_Front_kPa', 'Brake_Pressure_Rear_kPa', 'Brake_Pressure_Diff']
Threshold: 1.2889857856405219e-14

===== TEST SAMPLE =====
Vehicle_Speed_kmh: 10.0
Vehicle_Speed_RPM_mean: 100.0
Wheel_RPM_std: 5.0
Battery_Voltage_V: 48.0
Battery_Current_A: 1000.0
Battery_Power_W: 480.0
Power_per_Speed: 40.0
Battery_SOC_Pct: 80.0
Battery_Resistance_Ohms: 0.01
Battery_Temp_Max_C: 35.0
Temp_Stress_Index: 350.0
Throttle_Pct: 20.0
Torque_Command_Error: 2.0
Brake_Pedal_Pct: 10.0
Brake_Pressure_Front_kPa: 200.0
Brake_Pressure_Rear_kPa: 180.0
Brake_Pressure_Diff: 20.0

===== RESULT =====
Anomaly detected!
Cluster: 4
Distance: 1359.7224949887225
Threshold: 1.2889857856405219e-14
Confidence: 135



In [None]:
df[FEATURES].to_csv("features_only.csv", index=False)