In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [13]:
import json
import numpy as np
import pandas as pd
import os

def extract_frame_features(frame):
    """Extract per-frame spatial features from mmWave frame data."""
    points = np.array(frame["frameData"].get("pointCloud", []))

    if points.size == 0:
        return None

    x, y, z, doppler, snr = points[:, 0], points[:, 1], points[:, 2], points[:, 3], points[:, 4]

    return {
        "frameNum": frame["frameData"].get("frameNum"),
        "timestamp": frame.get("timestamp"),
        "num_points": len(points),
        "mean_x": np.mean(x),  "mean_y": np.mean(y),  "mean_z": np.mean(z),
        "var_x": np.var(x),    "var_y": np.var(y),    "var_z": np.var(z),
        "mean_doppler": np.mean(doppler),
        "var_doppler": np.var(doppler),
        "mean_snr": np.mean(snr),
        "var_snr": np.var(snr),
        "bbox_x_min": np.min(x), "bbox_x_max": np.max(x),
        "bbox_y_min": np.min(y), "bbox_y_max": np.max(y),
        "bbox_z_min": np.min(z), "bbox_z_max": np.max(z),
    }


def compute_velocity_acceleration(df):
    df = df.sort_values("frameNum").reset_index(drop=True)

    dt = np.diff(df["timestamp"], prepend=df["timestamp"].iloc[0]) / 1000.0
    dt[dt == 0] = 1e-6  # avoid divide by zero

    for axis in ["x", "y", "z"]:
        df[f"vel_{axis}"] = df[f"mean_{axis}"].diff().fillna(0) / dt
        df[f"acc_{axis}"] = df[f"vel_{axis}"].diff().fillna(0) / dt

    df["vel_mag"] = np.sqrt(df["vel_x"] ** 2 + df["vel_y"] ** 2 + df["vel_z"] ** 2)
    df["acc_mag"] = np.sqrt(df["acc_x"] ** 2 + df["acc_y"] ** 2 + df["acc_z"] ** 2)

    return df


def aggregate_features(df, window_seconds=1.0):
    df = df.sort_values("timestamp").reset_index(drop=True)

    start_time = df["timestamp"].iloc[0]
    df["time_window"] = ((df["timestamp"] - start_time) / 1000.0 // window_seconds).astype(int)

    agg_funcs = {
        "num_points": ["mean", "std"],
        "mean_x": ["mean", "std"], "mean_y": ["mean", "std"], "mean_z": ["mean", "std"],
        "vel_mag": ["mean", "max", "std"],
        "acc_mag": ["mean", "max", "std"],
        "mean_doppler": ["mean", "std"],
        "mean_snr": ["mean", "std"],
    }

    agg_df = df.groupby("time_window").agg(agg_funcs)
    agg_df.columns = ['_'.join(col).strip() for col in agg_df.columns.values]
    agg_df.reset_index(drop=True, inplace=True)

    times = df.groupby("time_window")["timestamp"].agg(["min", "max"]).reset_index(drop=True)
    agg_df["window_start_ms"] = times["min"]
    agg_df["window_end_ms"] = times["max"]

    return agg_df


def extract_to_csv(input_file, output_csv, label_value, window_seconds=1.0):
    """Extract + aggregate + append data from 1 JSON file."""

    # Skip files that are not valid JSON
    try:
        with open(input_file, "r") as f:
            data = json.load(f)
    except:
        print(f"Skipping (not a JSON file or corrupted): {input_file}")
        return

    frames = data.get("data", [])
    rows = [extract_frame_features(f) for f in frames if extract_frame_features(f)]

    if not rows:
        print(f" No valid frames in {input_file}")
        return

    df = pd.DataFrame(rows)
    df = compute_velocity_acceleration(df)
    agg_df = aggregate_features(df, window_seconds)

    # Add label from folder
    agg_df["result"] = label_value

    file_has_data = os.path.exists(output_csv) and os.path.getsize(output_csv) > 0

    agg_df.to_csv(output_csv, mode="a", header=not file_has_data, index=False)

    print(f" Processed {input_file} → {len(agg_df)} aggregated rows")

def process_files_in_subfolders(root_folder, csv_filepath):

    # Remove empty CSVs to force header on first write
    if os.path.exists(csv_filepath) and os.path.getsize(csv_filepath) == 0:
        os.remove(csv_filepath)
        print("Removed empty CSV so headers can be written fresh.")

    print("\n Scanning folders...\n")

    for dirpath, dirnames, filenames in os.walk(root_folder):

        # Skip the root — we want subfolders as labels
        if dirpath == root_folder:
            continue

        folder_name = os.path.basename(dirpath)
        print(f"\n Folder detected: {folder_name}")

        for filename in filenames:
            if filename.lower().endswith(".json"):
                json_path = os.path.join(dirpath, filename)
                extract_to_csv(json_path, csv_filepath, label_value=folder_name, window_seconds=0.5)

if __name__ == "__main__":

    input_path = "/content/drive/MyDrive/data before classification"
    output_path = "/content/drive/MyDrive/mmwave_aggregated.csv"

    process_files_in_subfolders(input_path, output_path)

    print("\n DONE! CSV CREATED WITH FULL HEADERS.\n")



 Scanning folders...


 Folder detected: walk and stop
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_9.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_3.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_1.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_4.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_11.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_2.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_10.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/walk and stop/replay_7.json → 11 aggregated rows
 Processed /content/drive/MyDrive/data before classification/w