In [None]:
# # Asymmetry-Based Model — Classical ML for Gait Detection

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# ## Load Dataset
df = pd.read_csv("detection_asymmetry.csv")

In [None]:
# ## Filter out undefined label = 2
df = df[df['label_lenient'] != 2].copy()

In [None]:
# ## Feature and Label Selection
features = ['gyro-asymmetry-stride-times', 'gyro-symmetry-ratio-stride-times']
X = df[features].values
y = df['label_lenient'].values
groups = df['patient_id'].values

In [None]:
# ## Define Models
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(),
    "SVC (RBF Kernel)": SVC(kernel='rbf', probability=True)
}

In [None]:
# ## Evaluate Using GroupKFold
gkf = GroupKFold(n_splits=5)
results = {}

In [None]:
for name, model in models.items():
    print(f"\nTraining model: {name}")
    y_true_all, y_pred_all = [], []

In [None]:
for fold, (train_idx, test_idx) in enumerate(gkf.split(X, y, groups)):
        model.fit(X[train_idx], y[train_idx])
        y_pred = model.predict(X[test_idx])
        y_true_all.extend(y[test_idx])
        y_pred_all.extend(y_pred)

In [None]:
report = classification_report(y_true_all, y_pred_all, digits=3, output_dict=True)
    results[name] = report
    print(classification_report(y_true_all, y_pred_all, digits=3))

In [None]:
# ## Visualize Confusion Matrix for Best Model
best_model = max(results.items(), key=lambda kv: kv[1]["1"]["f1-score"])[0]
print(f"\nBest model based on F1-score for class 1: {best_model}")

In [None]:
model = models[best_model]
y_true_all, y_pred_all = [], []

In [None]:
for train_idx, test_idx in gkf.split(X, y, groups):
    model.fit(X[train_idx], y[train_idx])
    y_pred = model.predict(X[test_idx])
    y_true_all.extend(y[test_idx])
    y_pred_all.extend(y_pred)

In [None]:
cm = confusion_matrix(y_true_all, y_pred_all)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Symmetric", "Asymmetric"], yticklabels=["Symmetric", "Asymmetric"])
plt.title(f"Confusion Matrix — {best_model}")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()