In [None]:
! pip install pandas numpy tensorflow scikit-learn


In [2]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
# ==========================================
# 1. Configuration
# ==========================================
# Data Settings
DATA_PATH = '/Users/mostafazamaniturk/Documents/USD/AAI-530/work/edaFolder/aligned_wrist_data_64Hz.csv'
if not os.path.exists(DATA_PATH):
    DATA_PATH = os.path.join(os.getcwd(), 'aligned_wrist_data_64Hz.csv')

WINDOW_SIZE_SEC = 60       # 60-second windows
SAMPLING_RATE = 64         # 64 Hz
WINDOW_LENGTH = WINDOW_SIZE_SEC * SAMPLING_RATE  # 3840 samples
STRIDE = WINDOW_LENGTH // 2  # 50% Overlap

# Classification: 'binary' = stress vs non-stress (0,1), '3class' = baseline/stress/amusement (0,1,2)
N_CLASSES = 2  # 2 = binary, 3 = 3-class

# Training Settings
BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 0.001

# The 11/2/2 Split (Hardcoded for reproducibility)
TRAIN_SUBJECTS = ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S13']
VAL_SUBJECTS   = ['S14', 'S15']
TEST_SUBJECTS  = ['S16', 'S17']

In [4]:
# ==========================================
# 2. Data Loading & Labeling
# ==========================================
def load_and_label_data(filepath):
    """
    Loads aligned BVP/EDA data. Uses real labels if 'label' column exists
    (0=baseline, 1=stress, 2=amusement, -1=other). Otherwise generates fallback labels.
    """
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"Data file not found: {filepath}\n(Unzip aligned_wrist_data_64Hz.csv.zip if needed)")
    print("Loading data...")
    df = pd.read_csv(filepath)

    if 'label' in df.columns:
        print("Using real labels from CSV. Distribution:")
        print(df['label'].value_counts().sort_index())
        # Ensure int type
        df['label'] = df['label'].astype(int)
    else:
        print("No 'label' column found. Using fallback (first half=baseline, second half=stress)...")
        df['label'] = 0
        for sub in df['subject'].unique():
            sub_mask = df['subject'] == sub
            max_time = df.loc[sub_mask, 'time_sec'].max()
            df.loc[sub_mask & (df['time_sec'] > max_time / 2), 'label'] = 1

    return df


In [5]:
# ==========================================
# 3. Preprocessing (Normalization & Windowing)
# ==========================================
def _window_label(window_labels, n_classes):
    """
    Majority vote for window. For 2-class: 0,2->0 (non-stress), 1->1 (stress).
    Skip windows where >50% are -1 (other/transition).
    """
    valid = window_labels[window_labels >= 0]  # exclude -1
    if len(valid) < len(window_labels) * 0.5:  # skip if too many -1
        return None
    if n_classes == 2:
        # Map 2 (amusement) -> 0 (non-stress)
        valid = np.where(valid == 2, 0, valid)
        valid = valid[valid <= 1]  # keep only 0,1
        if len(valid) == 0:
            return None
    counts = np.bincount(valid.astype(int), minlength=n_classes)
    return int(np.argmax(counts))

def create_windows(df, subjects, n_classes=2):
    """
    Slices data into 60-second windows. Normalizes per subject (Z-Score).
    For 2-class: 0=non-stress (baseline+amusement), 1=stress. Skips windows with majority -1.
    """
    X_windows, y_windows = [], []

    for sub in subjects:
        sub_df = df[df['subject'] == sub].copy()
        if sub_df.empty:
            continue

        scaler = StandardScaler()
        sub_df[['bvp', 'eda']] = scaler.fit_transform(sub_df[['bvp', 'eda']])
        data_arr = sub_df[['bvp', 'eda']].values
        label_arr = sub_df['label'].values
        num_samples = len(data_arr)

        for start in range(0, num_samples - WINDOW_LENGTH, STRIDE):
            end = start + WINDOW_LENGTH
            window_data = data_arr[start:end]
            window_labels = label_arr[start:end]
            lbl = _window_label(window_labels, n_classes)
            if lbl is not None:
                X_windows.append(window_data)
                y_windows.append(lbl)

    return np.array(X_windows), np.array(y_windows)

In [6]:
# ==========================================
# 4. Model Architecture (1D-CNN)
# ==========================================
def build_1d_cnn(input_shape, n_classes=2):
    """Lightweight 1D-CNN for BVP/EDA. n_classes=2 (stress vs non-stress) or 3 (baseline/stress/amusement)."""
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv1D(filters=32, kernel_size=64, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling1D(pool_size=4)(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv1D(filters=64, kernel_size=32, activation='relu', padding='same')(x)
    x = layers.MaxPooling1D(pool_size=4)(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv1D(filters=128, kernel_size=16, activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling1D()(x)

    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [7]:
# ==========================================
# 5. Main Execution Pipeline
# ==========================================
# A. Load Data
df = load_and_label_data(DATA_PATH)

# B. Create Datasets (Train / Val / Test)
print(f"\nCreating windows ({WINDOW_SIZE_SEC}s, {N_CLASSES}-class)...")
X_train, y_train = create_windows(df, TRAIN_SUBJECTS, n_classes=N_CLASSES)
X_val, y_val     = create_windows(df, VAL_SUBJECTS, n_classes=N_CLASSES)
X_test, y_test   = create_windows(df, TEST_SUBJECTS, n_classes=N_CLASSES)

print(f"Train Shape: {X_train.shape} (Subjects: {len(TRAIN_SUBJECTS)})")
print(f"Val Shape:   {X_val.shape}   (Subjects: {len(VAL_SUBJECTS)})")
print(f"Test Shape:  {X_test.shape}  (Subjects: {len(TEST_SUBJECTS)})")
if len(X_train) > 0:
    print(f"Train class distribution: {np.bincount(y_train.astype(int), minlength=N_CLASSES)}")

# C. Build & Train Model
input_shape = (WINDOW_LENGTH, 2)  # [3840, 2]
model = build_1d_cnn(input_shape, n_classes=N_CLASSES)
model.summary()

print("\nStarting Training...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1
)

# D. Final Evaluation
print("\nEvaluating on Test Set (Unseen Subjects)...")
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

y_pred = np.argmax(model.predict(X_test, verbose=0), axis=1)
class_names = ['non-stress', 'stress'] if N_CLASSES == 2 else ['baseline', 'stress', 'amusement']
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Save the model
save_path = os.path.join(os.path.dirname(DATA_PATH), 'wesad_1d_cnn.h5')
model.save(save_path)
print(f"\nModel saved to {save_path}")

Loading data...
Using real labels from CSV. Distribution:
label
-1    509467
 0     19120
 1     10925
 2      6169
Name: count, dtype: int64

Creating windows (60s, 2-class)...
Train Shape: (8, 3840, 2) (Subjects: 11)
Val Shape:   (2, 3840, 2)   (Subjects: 2)
Test Shape:  (1, 3840, 2)  (Subjects: 2)
Train class distribution: [8 0]



Starting Training...
Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 1.0000 - loss: 0.5388 - val_accuracy: 1.0000 - val_loss: 0.1343
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 1.0000 - loss: 0.1040 - val_accuracy: 1.0000 - val_loss: 0.0046
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 1.0000 - loss: 0.0256 - val_accuracy: 1.0000 - val_loss: 2.1815e-05
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step - accuracy: 1.0000 - loss: 8.8884e-05 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 1.0000 - loss: 1.5288e-04 - val_accuracy: 1.0000 - val_loss: 0.0000e+

ValueError: Number of classes, 1, does not match size of target_names, 2. Try specifying the labels parameter