In [165]:
import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing  import StandardScaler
from sklearn.impute         import SimpleImputer
from sklearn.svm            import SVC
from sklearn.pipeline       import Pipeline
from sklearn.metrics        import classification_report

# ───────── config ────────────────────────────────────────────────────────
CSV_DIR   = Path("../output/files")          # << adjust if needed
WIN       = 7                               # trailing window (0…-10)
SHIFT     = WIN - 1
TIPS      = ["ThumbTip","IndexTip","MiddleTip","RingTip","LittleTip"]
KEEP_NEG  = 4                                # keep at most 4× positives
PARAMS    = {"svc__C":[1,2], "svc__gamma":[0.05,0.1]}  # small grid

# ───────── load & merge CSVs ─────────────────────────────────────────────
files = sorted(CSV_DIR.glob("finger_snap_*.csv"))
if not files:
    raise SystemExit(f"No CSVs in {CSV_DIR}")

df = pd.concat(map(pd.read_csv, files), ignore_index=True)
df["y"] = (df["Label"].fillna("None") != "None").astype(int)

needed = ["Dist","Speed","PalmDist"] + [f"{t}.{a}" for t in TIPS for a in "xyz"]
df = df.dropna(subset=needed).reset_index(drop=True)

# ───────── rolling statistics (min / mean / max) ─────────────────────────
roll = df[["Dist","Speed","PalmDist"]].rolling(WIN, min_periods=WIN)

df_feat = pd.DataFrame({
    "dMin" : roll["Dist"].min(),
    "dMean": roll["Dist"].mean(),
    "sMax" : roll["Speed"].max(),
    "sMean": roll["Speed"].mean(),
    "pMin" : roll["PalmDist"].min(),
    "pMean": roll["PalmDist"].mean()
})

# ───────── ΔXYZ & mean-velocities (vectorised shift) ─────────────────────
dt = WIN / 60.0                              # seconds in window
for tip in TIPS:
    for ax in "xyz":
        col = f"{tip}.{ax}"
        # Ensure the column is numeric before subtraction
        df[col] = pd.to_numeric(df[col], errors='coerce')
        delta = df[col] - df[col].shift(SHIFT)
        df_feat[f"{tip[0]}_{ax}_delta"] = delta 

# ───────── final feature DF, drop NaNs (first SHIFT rows) ────────────────
df_feat["y"] = df["y"]
df_feat = df_feat.dropna().reset_index(drop=True)

# ───────── optional negative subsample  (speeds-up training) ─────────────
pos = df_feat[df_feat.y == 1]
neg = df_feat[df_feat.y == 0]
neg_keep = min(len(neg), KEEP_NEG * len(pos))
neg = neg.sample(neg_keep, random_state=0) if neg_keep else neg
df_feat = pd.concat([pos, neg], ignore_index=True).sample(frac=1, random_state=0)

print("Feature dim =", df_feat.shape[1]-1,
      "| positives:", len(pos), "negatives:", len(neg))

# ───────── split & train SVM ─────────────────────────────────────────────
X, y = df_feat.drop(columns="y").values, df_feat["y"].values
Xtr, Xts, ytr, yts = train_test_split(X, y, test_size=.25,
                                      stratify=y, random_state=42)

pipe = Pipeline([
    ("imp", SimpleImputer(strategy="mean")),
    ("sc" , StandardScaler()),
    ("svc", SVC(kernel="rbf",
                class_weight={0:1, 1:3},  # adjust for class imbalance
                probability=False,
                cache_size=2048))
])

clf = GridSearchCV(pipe, PARAMS, cv=4, n_jobs=-1, verbose=0)
clf.fit(Xtr, ytr)

print("Best params:", clf.best_params_)
print(classification_report(yts, clf.predict(Xts), digits=4))

# ───────── extract parts & export flat JSON (unchanged schema) ───────────
best   = clf.best_estimator_
scaler = best.named_steps["sc"]
svm    = best.named_steps["svc"]

model = dict(
    mean      = scaler.mean_.astype("float32").tolist(),
    scale     = scaler.scale_.astype("float32").tolist(),
    svFlat    = svm.support_vectors_.astype("float32").ravel().tolist(),
    nSV       = int(svm.support_vectors_.shape[0]),
    featDim   = int(svm.support_vectors_.shape[1]),
    dualCoef  = svm.dual_coef_.astype("float32").ravel().tolist(),
    intercept = float(svm.intercept_[0]),
    gamma     = float(svm._gamma)
)

out = Path("../Assets/StreamingAssets"); out.mkdir(parents=True, exist_ok=True)
with (out / "snap_svm.json").open("w") as fp:
    json.dump(model, fp, separators=(",",":"))
print(f"\n→ {out/'snap_svm.json'}  (dim {model['featDim']}  | SV {model['nSV']})")


Feature dim = 21 | positives: 414 negatives: 1656
Best params: {'svc__C': 2, 'svc__gamma': 0.05}
              precision    recall  f1-score   support

           0     0.9728    0.9493    0.9609       414
           1     0.8158    0.8942    0.8532       104

    accuracy                         0.9382       518
   macro avg     0.8943    0.9218    0.9070       518
weighted avg     0.9413    0.9382    0.9393       518


→ ../Assets/StreamingAssets/snap_svm.json  (dim 21  | SV 438)


In [166]:
3

3