In [None]:
import scipy.io
import mat73
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# === Parameters ===
window_size = 1000
step_size = 500
emg_key = 'emg'
restimulus_key = 'restimulus'
rerepetition_key = 'rerepetition'

# === Feature Extraction Function ===
def extract_features(emg_segment):
    rms = np.sqrt(np.mean(emg_segment**2, axis=0))
    mav = np.mean(np.abs(emg_segment), axis=0)
    wl = np.sum(np.abs(np.diff(emg_segment, axis=0)), axis=0)
    fft = np.abs(np.fft.rfft(emg_segment, axis=0))
    mean_freq = np.mean(fft, axis=0)
    median_freq = np.median(fft, axis=0)
    return np.concatenate([rms, mav, wl, mean_freq, median_freq])

# === Robust Loader (mat73 or scipy fallback) ===
def safe_loadmat(mat_path):
    try:
        return mat73.loadmat(mat_path)
    except:
        try:
            return scipy.io.loadmat(mat_path, variable_names=[emg_key, restimulus_key, rerepetition_key])
        except Exception as e:
            print(f"❌ Skipped {mat_path}: {e}")
            return None

# === Load All .mat Files ===
X_all, y_all = [], []
mat_files = glob(r"C:\Users\kulwant dhillon\Downloads\DB2_s1\DB2_s1\*.mat")

for mat_path in mat_files:
    mat = safe_loadmat(mat_path)
    if mat is None:
        continue

    try:
        emg = mat[emg_key]
        restimulus = np.ravel(mat[restimulus_key])
        rerepetition = np.ravel(mat[rerepetition_key])
    except Exception as e:
        print(f"❌ Missing data in {mat_path}: {e}")
        continue

    # Compute rep-level RMS & MDF
    rep_stats = {}
    for gesture in np.unique(restimulus):
        if gesture == 0:
            continue
        for rep in [1, 5]:
            indices = np.where((restimulus == gesture) & (rerepetition == rep))[0]
            if len(indices) < window_size:
                continue
            segment = emg[indices[:window_size], :]
            rms = np.sqrt(np.mean(segment**2))
            fft = np.abs(np.fft.rfft(segment, axis=0))
            psd = np.mean(fft**2, axis=1)
            freqs = np.fft.rfftfreq(segment.shape[0], d=1/1000)
            cumulative_power = np.cumsum(psd)
            total_power = cumulative_power[-1]
            mdf = freqs[np.where(cumulative_power >= total_power / 2)[0][0]]
            rep_stats[(gesture, rep)] = {'rms': rms, 'mdf': mdf}

    # Windowing + Dynamic Fatigue Labeling
    for gesture in np.unique(restimulus):
        if gesture == 0:
            continue

        stats_1 = rep_stats.get((gesture, 1))
        stats_5 = rep_stats.get((gesture, 5))
        label_rep5 = 0
        if stats_1 and stats_5:
            rms_change = (stats_5['rms'] - stats_1['rms']) / stats_1['rms']
            mdf_change = (stats_5['mdf'] - stats_1['mdf']) / stats_1['mdf']
            if rms_change > 0.15 and mdf_change < -0.10:
                label_rep5 = 1

        for rep in [1, 5]:
            indices = np.where((restimulus == gesture) & (rerepetition == rep))[0]
            if len(indices) < window_size:
                continue
            for start in range(0, len(indices) - window_size, step_size):
                segment = emg[indices[start:start + window_size], :]
                if segment.shape[0] < window_size:
                    continue
                features = extract_features(segment)
                X_all.append(features)
                y_all.append(0 if rep == 1 else label_rep5)

X = np.array(X_all, dtype=np.float32)
y = np.array(y_all)
print(f"📊 Dataset shape: {X.shape}, Labels: {np.bincount(y)}")

# === Balance Classes ===
X_f0 = X[y == 0]
X_f1 = X[y == 1]
X_f1_down, y_f1_down = resample(
    X_f1, y[y == 1], 
    replace=True,   # <- fix here
    n_samples=len(X_f0), 
    random_state=42
)

X_bal = np.vstack([X_f0, X_f1_down])
y_bal = np.concatenate([np.zeros(len(X_f0)), np.ones(len(X_f0))])

# === Train/Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, stratify=y_bal, test_size=0.2, random_state=42)

# === Normalize + PCA ===
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(n_components=20)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# === Train Classifier ===
clf = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.03,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric='logloss',
    use_label_encoder=False,
    random_state=42
)
clf.fit(X_train, y_train)

# === Evaluate ===
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
roc_score = roc_auc_score(y_test, y_prob)

(conf_matrix, class_report, roc_score)


📊 Dataset shape: (17585, 60), Labels: [16690   895]


(array([[3110,  228],
        [  51, 3287]], dtype=int64),
 '              precision    recall  f1-score   support\n\n         0.0       0.98      0.93      0.96      3338\n         1.0       0.94      0.98      0.96      3338\n\n    accuracy                           0.96      6676\n   macro avg       0.96      0.96      0.96      6676\nweighted avg       0.96      0.96      0.96      6676\n',
 0.9929140844519291)

In [6]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

y_true = y_test
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]  # Probability for positive class

# Assuming y_true and y_pred are defined, and y_proba for ROC
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)
roc = roc_auc_score(y_true, y_proba)

print("📊 Confusion Matrix:")
print(conf_matrix)

print("\n📝 Classification Report:")
print(report)

print(f"\n🎯 ROC-AUC Score: {roc:.4f}")


📊 Confusion Matrix:
[[3110  228]
 [  51 3287]]

📝 Classification Report:
              precision    recall  f1-score   support

         0.0       0.98      0.93      0.96      3338
         1.0       0.94      0.98      0.96      3338

    accuracy                           0.96      6676
   macro avg       0.96      0.96      0.96      6676
weighted avg       0.96      0.96      0.96      6676


🎯 ROC-AUC Score: 0.9929
