In [13]:
from pathlib import Path
import pandas as pd
import numpy as np

IN_DIR  = r"C:\Users\Haiya\Downloads\OneDrive_2025-09-19\patient_data"
OUT_DIR = r"C:\Users\Haiya\Downloads\OneDrive_2025-09-19\patient_data_2features"

LEFT_COL  = "Amplitude left aEEG, Left Hemi (No Fp)"
RIGHT_COL = "Amplitude right aEEG, Right Hemi (No Fp)"

Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

mi = []

def excel_serial_to_dt(s):
    """Convert Excel serial days to datetime (NaT if not numeric)."""
    ser = pd.to_numeric(s, errors="coerce")
    return pd.to_datetime("1899-12-30") + pd.to_timedelta(ser, unit="D")

for f in Path(IN_DIR).glob("*.csv"):
    print(f"\nProcessing: {f.name}")
    if f.name not in mi: continue
    try:
        df = pd.read_csv(f, low_memory=False)
    except UnicodeDecodeError:
        df = pd.read_csv(f, encoding="latin-1", low_memory=False)

    # drop the first row if present
    if 0 in df.index:
        df = df.drop(index=0)
        
    first_col_name = df.columns[0]
    first_col_raw = df.iloc[:, 0]

    # Try normal datetime parsing; where that fails, try Excel serials
    time_parsed = pd.to_datetime(first_col_raw, errors="coerce", infer_datetime_format=True)
    # If most values are still NaT but look like large numbers, try Excel serial:
    if time_parsed.isna().mean() > 0.5:
        excel_try = excel_serial_to_dt(first_col_raw)
        # Use Excel parsing where it succeeded, otherwise keep original parses
        time_parsed = time_parsed.fillna(excel_try)

    # If still all NaT, just keep the raw text (don’t break)
    if time_parsed.isna().all():
        time_out = first_col_raw.astype(str)
    else:
        time_out = time_parsed

    # --- required columns ---
    missing = [c for c in [LEFT_COL, RIGHT_COL, "Time"] if c not in df.columns]
    if missing:
        print("  !! Missing columns:", missing)
        print("  Columns available:", list(df.columns))
        # skip this file but continue with others
        continue

    # numeric coercion
    ts    = pd.to_numeric(df["Time"],    errors="coerce").fillna(0)
    left  = pd.to_numeric(df[LEFT_COL],  errors="coerce").fillna(0)
    right = pd.to_numeric(df[RIGHT_COL], errors="coerce").fillna(0)

    # stable sort by Time and align
    order = ts.sort_values(kind="mergesort").index
    out = pd.DataFrame({
        "time":        time_out.loc[order].reset_index(drop=True),
        "aEEG_left":   left.loc[order].reset_index(drop=True),
        "aEEG_right":  right.loc[order].reset_index(drop=True),
    })

    out.to_csv(Path(OUT_DIR) / f.name, index=False)
    print("  Saved →", (Path(OUT_DIR) / f.name).name)

print("\nDone.")



Processing: ID1-1_822_6.5.19.csv

Processing: ID1-2_792_10.11.19.csv

Processing: ID1-3_822_10.12.19.csv

Processing: ID10-1_582_5.13.21.csv

Processing: ID10-2_582_5.15.21.csv

Processing: ID10-2_795_5.14.21.csv

Processing: ID11-1_706_6.3.21.csv

Processing: ID11-2_793_6.4.21.csv

Processing: ID11-4_706_6.6.21.csv

Processing: ID11-5_706_6.7.21.csv

Processing: ID12-1_848_6.6.21.csv

Processing: ID12-3_800_6.8.21.csv

Processing: ID12-4_848_6.9.21.csv

Processing: ID13-1_444_6.9.21.csv

Processing: ID13-2_792_6.10.21.csv

Processing: ID14-1_511_6.30.21.csv

Processing: ID14-2_787_7.1.21.csv

Processing: ID14-3_511_7.2.21.csv

Processing: ID15-1_718_7.2.21.csv

Processing: ID15-2_608_7.3.21.csv

Processing: ID16-1_590_7.15.21.csv

Processing: ID16-2_794_7.17.21.csv

Processing: ID16-3_590_7.18.21.csv

Processing: ID16-4_590_7.19.21.csv

Processing: ID18-11_583_7.28.21.csv

Processing: ID18-1_796_7.28.21.csv

Processing: ID18-2_583_7.29.24.csv

Processing: ID18-3_583_7.30.21.csv

Proc

  time_parsed = pd.to_datetime(first_col_raw, errors="coerce", infer_datetime_format=True)
  time_parsed = pd.to_datetime(first_col_raw, errors="coerce", infer_datetime_format=True)


  Saved → ID19-2_793_7.31.21.csv

Processing: ID2-1_496_6.4.20.csv

Processing: ID2-2_496_6.5.20.csv

Processing: ID2-3_799_6.6.20.csv

Processing: ID2-4_496_6.7.19.csv

Processing: ID20-1_500_8.6.21.csv

Processing: ID20-2_785_8.7.21.csv

Processing: ID21-1_836_8.6.21.csv

Processing: ID21-2_792_8.7.21.csv

Processing: ID22-1_488_8.11.21.csv

Processing: ID22-2_800_8.12.22.csv

Processing: ID23-1_1594_8.14.21.csv

Processing: ID24-1_618_8.22.21.csv

Processing: ID24-2_732_8.23.21.csv

Processing: ID25-1_694_9.9.21.csv

Processing: ID25-2_694_9.10.21.csv

Processing: ID25-3_799_9.11.21.csv

Processing: ID25-4_694_9.12.21.csv

Processing: ID26-1_393_9.28.21.csv

Processing: ID26-2_795_9.29.21.csv

Processing: ID26-3_393_9.30.21.csv

Processing: ID27-1_834_9.29.21.csv

Processing: ID27-2_795_9.30.21.csv

Processing: ID27-3_834_10.1.21.csv

Processing: ID27-4_834_10.2.21.csv

Processing: ID28-1_386_10.09.21.csv

Processing: ID3-1_492_9.2.20.csv

Processing: ID3-2_492_9.3.20.csv

Processin