<a href="https://colab.research.google.com/github/Reem-Aboutaleb/multimodal-stress-detection/blob/main/05_Model_Comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks, welch

# Simulate signals
n_samples = 5000
t = np.linspace(0, 10, n_samples)
ppg = 0.6 * np.sin(2 * np.pi * 1.2 * t) + 0.05 * np.random.randn(n_samples)
eda = 0.05 * np.random.randn(n_samples) + np.interp(t, [0, 10], [0.2, 0.5]) + 0.05 * (np.random.rand(n_samples) > 0.98)

# HRV features
peaks, _ = find_peaks(ppg, distance=50)
ibi = np.diff(peaks)

def pnn50(ibi):
    diff_ibi = np.abs(np.diff(ibi))
    return 100.0 * np.sum(diff_ibi > 50) / len(diff_ibi)

def frequency_domain_features(ibi, fs=4):
    if len(ibi) < 2:
        return {'HRV_LF': 0, 'HRV_HF': 0, 'LF_HF_ratio': 0}
    fxx, pxx = welch(ibi, fs=fs)
    lf_band = (fxx >= 0.04) & (fxx <= 0.15)
    hf_band = (fxx >= 0.15) & (fxx <= 0.4)
    lf = np.trapz(pxx[lf_band], fxx[lf_band])
    hf = np.trapz(pxx[hf_band], fxx[hf_band])
    return {
        'HRV_LF': lf,
        'HRV_HF': hf,
        'LF_HF_ratio': lf / hf if hf != 0 else 0
    }

hrv_features = {
    'ppg_ibi_mean': np.mean(ibi),
    'ppg_ibi_sdnn': np.std(ibi),
    'ppg_ibi_rmssd': np.sqrt(np.mean(np.square(np.diff(ibi)))),
    'pnn50': pnn50(ibi)
}
hrv_features.update(frequency_domain_features(ibi))

# EDA features
eda_mean = np.mean(eda)
eda_std = np.std(eda)
eda_max = np.max(eda)
eda_min = np.min(eda)
eda_peaks, properties = find_peaks(eda, distance=50, prominence=0.02)
scr_count = len(eda_peaks)
scr_mean_amp = np.mean(properties["prominences"]) if scr_count > 0 else 0

# Combine
features = {
    **hrv_features,
    'eda_mean': eda_mean,
    'eda_std': eda_std,
    'eda_max': eda_max,
    'eda_min': eda_min,
    'eda_scr_count': scr_count,
    'eda_scr_mean_amp': scr_mean_amp
}

features_df = pd.DataFrame([features])
features_df.to_csv("extracted_features_advanced.csv", index=False)
print("✅ Saved extracted_features_advanced.csv")

import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load the advanced feature CSV
df = pd.read_csv('extracted_features_advanced.csv')

# Simulate 50 slightly varied rows
rows = []
for _ in range(50):
    new_row = df.iloc[0].copy()
    noise = np.random.normal(0, 0.1, len(new_row))
    new_row += noise
    rows.append(new_row)

df = pd.DataFrame(rows)
df['label'] = [0]*17 + [1]*17 + [2]*16

# Split data
X = df.drop(columns=['label'])
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
results = []

models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'SVM (RBF Kernel)': SVC(kernel='rbf'),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5)
}

for name, model in models.items():
    start = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    end = time.time()
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    results.append([name, acc, f1, end - start])
results_df = pd.DataFrame(results, columns=['Model', 'Accuracy', 'F1 Score', 'Training Time (s)'])
results_df.sort_values(by='Accuracy', ascending=False, inplace=True)
results_df.reset_index(drop=True, inplace=True)
results_df


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  lf = np.trapz(pxx[lf_band], fxx[lf_band])
  hf = np.trapz(pxx[hf_band], fxx[hf_band])


✅ Saved extracted_features_advanced.csv


Unnamed: 0,Model,Accuracy,F1 Score,Training Time (s)
0,Random Forest,0.466667,0.426407,0.436625
1,K-Nearest Neighbors,0.466667,0.406138,0.023532
2,Logistic Regression,0.333333,0.237037,0.088871
3,SVM (RBF Kernel),0.333333,0.24381,0.010533
