In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader


In [20]:
import os

target_path = "/users/lakaiden/cpsy1291_eeg_clf/data/AlzheimerEEG_data/Alzheimer-s-Classification-EEG/data"
os.chdir(target_path)
print("Notebook working directory set to:", os.getcwd())
print("Files in directory:", os.listdir("."))


Notebook working directory set to: /oscar/home/lakaiden/cpsy1291_eeg_clf/data/AlzheimerEEG_data/Alzheimer-s-Classification-EEG/data
Files in directory: ['ADvsHCFourier.csv', 'CASEVsHCFourier.csv', 'MCIvsADFourier.csv', 'MCIvsHCFourier.csv', 'emp']


In [21]:
df = pd.read_csv("ADvsHCFourier.csv")
#dataset is explicity FFT magnitude 

df['class'] = df['class'].astype(str).str.strip().str.upper() #make sure class labels are normalizes


df['label'] = df['class'].map({'AD': 1, 'CONTROL': 0})#mapping AD to 1 ands CONTROL to 0

print("Before filtering:", df['label'].isna().sum(), "NaNs")

# Remove MCI rows(two classses)
df = df.dropna(subset=['label'])

y = df['label'].astype(int).values

# Odrop experiment class and label columns
X = df.drop(columns=['experiment', 'class', 'label']).values

# Reshape for EEGNet, 19 electrodes, chunk+size of 16
X = X.reshape(len(X), 1, 19, 16)
scaler = StandardScaler()


Before filtering: 0 NaNs


In [25]:
class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.temporal = nn.Conv2d(1, 16, kernel_size=(1,5), padding=(0,2), bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.depthwise = nn.Conv2d(16, 32, kernel_size=(19,1), groups=16, bias=False)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool1 = nn.AvgPool2d((1,4))
        self.drop1 = nn.Dropout(0.6)
        self.separable = nn.Conv2d(32, 32, kernel_size=(1,15), padding=(0,7), bias=False)
        self.bn3 = nn.BatchNorm2d(32)
        self.pool2 = nn.AvgPool2d((1,2))
        self.drop2 = nn.Dropout(0.6)
        self.fc = nn.Linear(32 * 1 * 2, 1)

    def forward(self, x):
        x = torch.relu(self.bn1(self.temporal(x)))
        x = torch.relu(self.bn2(self.depthwise(x)))
        x = self.pool1(x)
        x = self.drop1(x)
        x = torch.relu(self.bn3(self.separable(x)))
        x = self.pool2(x)
        x = self.drop2(x)
        x = x.view(x.size(0), -1)
        return torch.sigmoid(self.fc(x))

if torch.cuda.is_available():
  device = "cuda"
else:
    device = "cpu"
print(f"Using {device} device")

skf = StratifiedKFold(n_splits=3, shuffle=True)

Using cpu device


In [26]:
accs= [] #list to store performance metrics for each fold  
aucs=  [] #

for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)): # lloops over each fold in stratified kfold cross validation
    print(f"Fold {fold+1}")
    #splits data based on fold indices
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Flatten data for Standard Scaler
    X_train_flat = X_train.reshape(len(X_train), -1)
    X_test_flat = X_test.reshape(len(X_test), -1)
    #Fit standard scaler
    X_train_flat = scaler.fit_transform(X_train_flat)
    X_test_flat = scaler.transform(X_test_flat)

    #EEGNET input shape transformatoin
    X_train = X_train_flat.reshape(len(X_train), 1, 19, 16)
    X_test = X_test_flat.reshape(len(X_test), 1, 19, 16)

    train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                               torch.tensor(y_train, dtype=torch.float32))
    test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                              torch.tensor(y_test, dtype=torch.float32))

    train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=16)

    #initialize EEGNET
    model = EEGNet().to(device)
    criterion = nn.BCELoss() #Cross entorpy loss of rbinary classification 

    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

    # Training th emodel 
    for epoch in range(50):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device).unsqueeze(1)
            optimizer.zero_grad()
            loss = criterion(model(xb), yb)
            loss.backward()
            optimizer.step()

    # Evaluate the model 
    model.eval()
    preds, true = [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb = xb.to(device)
            p = model(xb).cpu().numpy()
            preds.extend(p.flatten())
            true.extend(yb.numpy())

    preds_bin = (np.array(preds) > 0.5).astype(int)#probabiblities ot class predictions
    acc = accuracy_score(true, preds_bin)
    accs.append(acc)

    


print(f"\nMean Accuracy: {np.mean(accs):.3f} ± {np.std(accs):.3f}")
print(f"Mean ROC-AUC: {np.mean(aucs):.3f} ± {np.std(aucs):.3f}")

Fold 1
Fold 2
Fold 3

Mean Accuracy: 0.722 ± 0.086
Mean ROC-AUC: nan ± nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


In [24]:
print(df['class'].value_counts(dropna=False))


AD         49
CONTROL    23
Name: class, dtype: int64
