In [1]:
import numpy as np
import pandas as pd
import os
import sys
import random

from pathlib import Path
from sklearn.model_selection import GroupKFold

import tensorflow as tf
from tensorflow.keras.metrics import AUC
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, GRU, Input, BatchNormalization, Dropout

2025-02-13 01:59:25.127125: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-13 01:59:25.132018: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-13 01:59:25.141763: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739401165.158333 1004981 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739401165.162306 1004981 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-13 01:59:25.180129: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
NUM_EPOCHS = 200

ID = ["ID"]
IDS = ["SubjectID", "VideoID"]
TARGET = ["predefinedlabel"]

FEATURES = [
    "Raw",
    "Delta",
    "Theta",
    "Alpha1",
    "Alpha2",
    "Beta1",
    "Beta2",
    "Gamma1",
    "Gamma2",
]

In [3]:
data_dir = Path("/home/aseliverstov/projects/brain_signals/data_confusion")
data = pd.read_csv(data_dir / "EEG_data.csv")

data["ID"] = (len(np.unique(data["VideoID"])) * data["SubjectID"] + data["VideoID"]).astype("int")
data = data[ID + FEATURES + TARGET]

data

Unnamed: 0,ID,Raw,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,predefinedlabel
0,0,278.0,301963.0,90612.0,33735.0,23991.0,27946.0,45097.0,33228.0,8293.0,0.0
1,0,-50.0,73787.0,28083.0,1439.0,2240.0,2746.0,3687.0,5293.0,2740.0,0.0
2,0,101.0,758353.0,383745.0,201999.0,62107.0,36293.0,130536.0,57243.0,25354.0,0.0
3,0,-5.0,2012240.0,129350.0,61236.0,17084.0,11488.0,62462.0,49960.0,33932.0,0.0
4,0,-8.0,1005145.0,354328.0,37102.0,88881.0,45307.0,99603.0,44790.0,29749.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
12806,99,-39.0,127574.0,9951.0,709.0,21732.0,3872.0,39728.0,2598.0,960.0,1.0
12807,99,-275.0,323061.0,797464.0,153171.0,145805.0,39829.0,571280.0,36574.0,10010.0,1.0
12808,99,-426.0,680989.0,154296.0,40068.0,39122.0,10966.0,26975.0,20427.0,2024.0,1.0
12809,99,-84.0,366269.0,27346.0,11444.0,9932.0,1939.0,3283.0,12323.0,1764.0,1.0


In [4]:
def reshape_dataset(data):
    features = []
    target = []
    for cur_id in np.unique(data[ID].to_numpy()):
        cur_id_data = data[data[ID].to_numpy() == cur_id]
        target.append(np.mean(cur_id_data[TARGET].to_numpy()).astype("int"))
        features.append(cur_id_data[FEATURES].to_numpy())

    features = pad_sequences(features)
    return np.array(features), np.array(target)

def pad_sequences(arrays, pad_value=0):
    max_length = max(arr.shape[0] for arr in arrays)
    padded_arrays = [
        np.pad(
            arr,
            ((0, max_length - arr.shape[0]), (0, 0)),
            mode='constant',
            constant_values=pad_value)
            for arr in arrays
        ]
    return np.stack(padded_arrays)

def create_model(train):
    model = Sequential()
    model.add(Input(shape=(train.shape[1], train.shape[2])))

    model.add(GRU(64, return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(GRU(32, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=["accuracy", AUC(name="auc")])
    return model


In [5]:
all_histories_acc = []
all_histories_loss = []
all_histories_auc = []

for seed in [1212, 123343, 74432, 64342, 9665]:
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

    group_kfold = GroupKFold(n_splits=5)

    for i, (train_index, test_index) in enumerate(group_kfold.split(data[FEATURES], data[TARGET], data[ID])):
        train = data.iloc[train_index]
        test = data.iloc[test_index]

        X_train, y_train = reshape_dataset(train)
        X_test, y_test = reshape_dataset(test)

        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        model = create_model(X_train)
        history = model.fit(
            X_train, y_train,
            validation_data=(X_test, y_test),
            epochs=NUM_EPOCHS,
            batch_size=16,
            verbose=0,
        )

        all_histories_acc.append(history.history['val_accuracy'])
        all_histories_loss.append(history.history['val_loss'])
        all_histories_auc.append(history.history['val_auc'])

avg_val_accuracy = np.mean(all_histories_acc, axis=0)
avg_val_loss = np.mean(all_histories_loss, axis=0)
avg_val_auc = np.mean(all_histories_auc, axis=0)

for epoch in range(NUM_EPOCHS):
    print(f"Epoch {epoch + 1}: Val Accuracy = {np.round(avg_val_accuracy[epoch], 2)} AUC = {np.round(avg_val_auc[epoch], 2)} Loss = {np.round(avg_val_loss[epoch], 2)}")

2025-02-13 01:59:27.009383: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1: Val Accuracy = 0.53 AUC = 0.56 Loss = 0.69
Epoch 2: Val Accuracy = 0.53 AUC = 0.59 Loss = 0.68
Epoch 3: Val Accuracy = 0.54 AUC = 0.6 Loss = 0.68
Epoch 4: Val Accuracy = 0.55 AUC = 0.62 Loss = 0.68
Epoch 5: Val Accuracy = 0.57 AUC = 0.62 Loss = 0.67
Epoch 6: Val Accuracy = 0.57 AUC = 0.64 Loss = 0.67
Epoch 7: Val Accuracy = 0.57 AUC = 0.65 Loss = 0.66
Epoch 8: Val Accuracy = 0.59 AUC = 0.67 Loss = 0.66
Epoch 9: Val Accuracy = 0.61 AUC = 0.67 Loss = 0.65
Epoch 10: Val Accuracy = 0.64 AUC = 0.67 Loss = 0.65
Epoch 11: Val Accuracy = 0.65 AUC = 0.68 Loss = 0.64
Epoch 12: Val Accuracy = 0.66 AUC = 0.7 Loss = 0.64
Epoch 13: Val Accuracy = 0.68 AUC = 0.72 Loss = 0.63
Epoch 14: Val Accuracy = 0.69 AUC = 0.72 Loss = 0.63
Epoch 15: Val Accuracy = 0.7 AUC = 0.73 Loss = 0.62
Epoch 16: Val Accuracy = 0.71 AUC = 0.74 Loss = 0.62
Epoch 17: Val Accuracy = 0.71 AUC = 0.74 Loss = 0.61
Epoch 18: Val Accuracy = 0.73 AUC = 0.75 Loss = 0.61
Epoch 19: Val Accuracy = 0.73 AUC = 0.74 Loss = 0.61
Epoch