<a href="https://colab.research.google.com/github/FarimaM/Machine-Learning-Course/blob/main/Homework5_Session5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎯 Mini-Project: DEAP Arousal Prediction

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_path = '/content/drive/MyDrive/DEAP_Dataset'
print("Dataset path set to:", data_path)

Dataset path set to: /content/drive/MyDrive/DEAP_Dataset


## Part 1: Classification Task

DEAP Arousal Prediction with SVM

In [None]:
## Setup :Import Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_val_predict, KFold
from sklearn.svm import SVC, SVR
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix,
    mean_squared_error, mean_absolute_error, r2_score
)
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
## Load the DEAP dataset (preprocessed EEG data and labels)
import numpy as np
import os

# Create empty lists to store EEG data, all labels, and arousal ratings
eeg_data_all = []
labels_all = []
arousal_labels = []

# Loop through all 32 subject files
for subject_id in range(1, 33):
    file_name = os.path.join(data_path, f's{subject_id:02d}.dat')
    with open(file_name, 'rb') as file:
        subject = np.load(file, allow_pickle=True, encoding='latin1')
        trials = subject['data']      # shape: (40, 40, 8064)
        labels = subject['labels']    # shape: (40, 4)

        for trial_index in range(40):
            eeg_trial = trials[trial_index]           # EEG data for one trial
            label_vector = labels[trial_index]        # [valence, arousal, dominance, liking]
            arousal = label_vector[1]                 # Arousal rating (1 to 9)

            eeg_data_all.append(eeg_trial)
            labels_all.append(label_vector)
            arousal_labels.append(arousal)

# Convert lists to NumPy arrays
eeg_data_all = np.array(eeg_data_all)       # shape: (1280, 40, 8064)
labels_all = np.array(labels_all)           # shape: (1280, 4)
arousal_labels = np.array(arousal_labels)   # shape: (1280,)

# Print basic info
print("Loaded EEG data shape:", eeg_data_all.shape)
print("Loaded full label matrix shape:", labels_all.shape)
print("Loaded arousal label vector shape:", arousal_labels.shape)

Loaded EEG data shape: (1280, 40, 8064)
Loaded full label matrix shape: (1280, 4)
Loaded arousal label vector shape: (1280,)


In [None]:
## Convert Arousal Ratings into 3 Classes (categories)
def bin_arousal_3class(rating):
    if rating <= 3:
        return 0  # Low
    elif rating <= 6:
        return 1  # Medium
    else:
        return 2  # High

# Apply to all arousal labels
y_class = np.array([bin_arousal_3class(r) for r in arousal_labels]) ## arousal_labels is already loaded (shape: [1280,])

# Show class distribution
unique, counts = np.unique(y_class, return_counts=True)
for label, count in zip(['Low', 'Medium', 'High'], counts):
    print(f"{label} Arousal: {count} samples")

Low Arousal: 226 samples
Medium Arousal: 530 samples
High Arousal: 524 samples


In [None]:
## Reshape EEG data for classification
# The shape of eeg_data_all is (1280, 40, 8064)
# Reshape it to (1280, 40 * 8064) where each row is a trial and each column is a feature
X_features = eeg_data_all.reshape(eeg_data_all.shape[0], -1)

print("Reshaped X_features shape:", X_features.shape)

Reshaped X_features shape: (1280, 322560)


In [None]:
## Train SVM Classifiers with Different Kernels
def evaluate_svm_classification(X, y, kernel_type, C=1.0, gamma='scale', degree=3):
    model = SVC(kernel=kernel_type, C=C, gamma=gamma, degree=degree)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    y_pred = cross_val_predict(model, X, y, cv=skf)

    acc = accuracy_score(y, y_pred)
    prec = precision_score(y, y_pred, average='macro')
    rec = recall_score(y, y_pred, average='macro')
    f1 = f1_score(y, y_pred, average='macro')

    print(f"\n🔍 Kernel: {kernel_type.upper()}")
    print(f"Accuracy: {acc:.3f}")
    print(f"Precision: {prec:.3f}")
    print(f"Recall: {rec:.3f}")
    print(f"F1-score: {f1:.3f}")
    print("Classification Report:")
    print(classification_report(y, y_pred, target_names=['Low', 'Medium', 'High']))

    return {
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1,
        'predictions': y_pred
    }

In [None]:
## Run Classifiers with Different Kernels
# Compare Kernels and Collect Results
kernels = ['linear', 'rbf', 'poly']
results = {}

print("Training SVMs with different kernels...\n")
print("="*60)

for kernel in kernels:
    if kernel == 'poly':
        result = evaluate_svm_classification(X_features, y_class, kernel_type=kernel, degree=3)
    else:
        result = evaluate_svm_classification(X_features, y_class, kernel_type=kernel)

    results[kernel] = result
    print("-"*60)

Training SVMs with different kernels...


🔍 Kernel: LINEAR
Accuracy: 0.368
Precision: 0.350
Recall: 0.351
F1-score: 0.347
Classification Report:
              precision    recall  f1-score   support

         Low       0.19      0.28      0.23       226
      Medium       0.43      0.38      0.40       530
        High       0.43      0.40      0.41       524

    accuracy                           0.37      1280
   macro avg       0.35      0.35      0.35      1280
weighted avg       0.39      0.37      0.37      1280

------------------------------------------------------------


In [None]:
## Visualize Accuracy Comparison
plt.figure(figsize=(10, 6))
kernel_names = [k.upper() for k in kernels]
accuracies = [results[k]['accuracy'] for k in kernels]

bars = plt.bar(kernel_names, accuracies, color=['skyblue', 'salmon', 'lightgreen'],
               edgecolor='black', linewidth=2)
plt.ylim([0, 1.0])
plt.ylabel('Cross-Validated Accuracy', fontsize=12)
plt.xlabel('Kernel Type', fontsize=12)
plt.title('SVM Performance Comparison Across Kernels', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)

# Add value labels
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.02,
             f'{acc:.3f}', ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
## Confusion Matrix for Best Kernel (e.g. RBF)
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Low', 'Medium', 'High'],
                yticklabels=['Low', 'Medium', 'High'])
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

plot_confusion_matrix(y_class, results['rbf']['predictions'], "Confusion Matrix - SVM RBF Kernel")

## Part 2: Regression Task

In [None]:
## Train SVR Models with Different Kernels
def train_svr_model(X, y, kernel_type):
    print(f"\nTraining SVR with {kernel_type} kernel...")
    model = SVR(kernel=kernel_type, C=1.0, gamma='scale')
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    y_pred = cross_val_predict(model, X, y, cv=kf)

    rmse = np.sqrt(mean_squared_error(y, y_pred))
    mae = mean_absolute_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    print(f"RMSE: {rmse:.3f}")
    print(f"MAE: {mae:.3f}")
    print(f"R² Score: {r2:.3f}")

    return y_pred

In [None]:
# Run SVR models
y_pred_svr_linear = train_svr_model(X_features, arousal_labels, 'linear')
y_pred_svr_rbf = train_svr_model(X_features, arousal_labels, 'rbf')
y_pred_svr_poly = train_svr_model(X_features, arousal_labels, 'poly')

In [None]:
## Scatter Plot for Best SVR (e.g. RBF)
def plot_regression_results(y_true, y_pred, title):
    plt.figure(figsize=(6,5))
    plt.scatter(y_true, y_pred, alpha=0.5, color='teal')
    plt.plot([1, 9], [1, 9], '--', color='gray')
    plt.xlabel("Actual Arousal")
    plt.ylabel("Predicted Arousal")
    plt.title(title)
    plt.grid(True)
    plt.show()

plot_regression_results(arousal_labels, y_pred_svr_rbf, "SVR RBF Kernel - Predicted vs Actual")