#Dataset Description

##Preprocessed data taken from a study done by Nanyang Technological University Singapore

https://arxiv.org/abs/2106.00613



# Load Dataset from Google Drive

In [None]:
from google.colab import drive
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import tensorflow as tf
from keras.callbacks import EarlyStopping
from tabulate import tabulate
import matplotlib.pyplot as plt
import pandas as pd
import os
from scipy.io import loadmat
import numpy as np
from keras import layers, Model
from google.colab import files
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from keras.metrics import AUC, Precision, Recall

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Path to the .mat file on your Google Drive
mat_file_path = '/content/drive/MyDrive/Capstone/dataset.mat'

# Load the .mat file
mat_data = loadmat(mat_file_path)


# Build the Transformer Model

In [None]:
def get_positional_encoding(seq_length, d_model):
    position = np.arange(seq_length)[:, np.newaxis]
    div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
    pos_enc = np.zeros((seq_length, d_model))
    pos_enc[:, 0::2] = np.sin(position * div_term)
    pos_enc[:, 1::2] = np.cos(position * div_term)
    return tf.convert_to_tensor(pos_enc, dtype=tf.float32)

class PositionalEncoding(layers.Layer):
    def __init__(self, seq_length, d_model):
        super().__init__()
        self.pos_encoding = get_positional_encoding(seq_length, d_model)

    def call(self, inputs):
        return inputs + self.pos_encoding

class FeedForward(layers.Layer):
    def __init__(self, d_model, dff, name="feedforward"):
        super(FeedForward, self).__init__(name=name)
        self.dense1 = layers.Dense(dff, activation='relu')
        self.dense2 = layers.Dense(d_model)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return x

class EncoderLayer(layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1, name="encoder_layer"):
        super(EncoderLayer, self).__init__(name=name)
        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = FeedForward(d_model, dff)
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.mha(inputs, inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        output = self.layernorm2(out1 + ffn_output)
        return output

class EEGTransformer(Model):
    def __init__(self, seq_length, d_model, num_heads, num_layers, num_classes, dff, dropout_rate):
        super().__init__()
        self.conv1d = layers.Conv1D(d_model, kernel_size=10, padding='same', activation='relu')
        self.positional_encoding = PositionalEncoding(seq_length, d_model)
        self.encoder_layers = [EncoderLayer(d_model, num_heads, dff, dropout_rate) for _ in range(num_layers)]
        self.dropout = layers.Dropout(dropout_rate)
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        self.classification_layer = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training):
        x = self.positional_encoding(inputs)
        x = self.conv1d(x)
        for i in range(len(self.encoder_layers)):
            x = self.encoder_layers[i](x, training)
        x = self.dropout(x, training=training)
        x = self.global_average_pooling(x)
        output = self.classification_layer(x)
        return output

# Split and Train the Data

In [None]:
EEGsample = mat_data['EEGsample']
subindex = mat_data['subindex']
substate = mat_data['substate']



In [None]:
EEGsample.shape


(2022, 30, 384)

In [None]:


# Initialize a DataFrame to store the results
results_df = pd.DataFrame(columns=['Subject', 'Average Accuracy', 'Average Loss', 'Average F1 Score', 'Average AUC Score'])

# Define the number of repeats and the number of folds
n_repeats = 5
n_splits = 5
rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats)

early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)

# Iterate through all patient IDs
for patient_id in range(1, 12):  # Assuming there are 11 patients with IDs from 1 to 11
    print(f"Processing subject {patient_id}")

    # Extract data for the current patient
    patient_mask = subindex == patient_id
    X = EEGsample[np.squeeze(patient_mask)]
    y = substate[np.squeeze(patient_mask)]

    channel_indices = [24,27,28,29]
    X_selected_channels = X[:, channel_indices, :]

    X = np.transpose(X_selected_channels, (0, 2, 1))


    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    y_tensor = tf.convert_to_tensor(y, dtype=tf.int32)

    # Initialize lists to store the fold accuracies, losses, F1 scores, and AUC scores
    fold_accuracies = []
    fold_losses = []
    fold_f1_scores = []
    fold_auc_scores = []

    # Iterate through the repeats of the k-fold cross-validation
    for train_index, test_index in rskf.split(X_tensor.numpy(), y_tensor.numpy()):

        model = EEGTransformer(
            seq_length=384,
            d_model=4,
            num_heads=8,
            num_layers=4,
            num_classes=2,
            dff=128,
            dropout_rate=0.3
        )

        # Compile the model
        model.compile(optimizer=Adam(learning_rate=0.00008), loss=SparseCategoricalCrossentropy(), metrics=['accuracy'])

        # Create the training and test sets
        X_train, X_test = X_tensor.numpy()[train_index], X_tensor.numpy()[test_index]
        y_train, y_test = y_tensor.numpy()[train_index], y_tensor.numpy()[test_index]

        # Further split the training data into training (80%) and validation (20%) sets
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

        # Convert the numpy arrays back into tensors
        X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
        y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
        X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
        y_val = tf.convert_to_tensor(y_val, dtype=tf.int32)
        X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
        y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)

        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
        val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)
        test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)

        # Train the model
        history = model.fit(train_dataset, validation_data=val_dataset, epochs=100, verbose=0, callbacks=[early_stop])

        # Evaluate the model on the test set
        test_loss, test_accuracy = model.evaluate(test_dataset, verbose=0)

        # Compute predictions for F1 and AUC scores for test data
        y_pred_raw = model.predict(X_test, verbose=0)
        y_pred = np.argmax(y_pred_raw, axis=-1)

        # Compute F1 scores and AUC scores for test data
        test_f1_score = f1_score(y_test, y_pred, average='weighted')
        test_auc_score = roc_auc_score(y_test, y_pred, average='weighted')

        # Append the accuracies and losses to their respective lists
        fold_accuracies.append(test_accuracy)
        fold_losses.append(test_loss)
        fold_f1_scores.append(test_f1_score)
        fold_auc_scores.append(test_auc_score)

    # Compute the average accuracies and losses
    avg_accuracy = np.mean(fold_accuracies)
    avg_loss = np.mean(fold_losses)
    avg_f1_score = np.mean(fold_f1_scores)
    avg_auc_score = np.mean(fold_auc_scores)

    # Add the results to the DataFrame (outside the channel loop)
    results_df = results_df.append({
        'Subject': patient_id,
        'Average Accuracy': avg_accuracy,
        'Average Loss': avg_loss,
        'Average F1 Score': avg_f1_score,
        'Average AUC Score': avg_auc_score
    }, ignore_index=True)

# Print the DataFrame
print(results_df)
results_df.to_excel('results.xlsx', index=False)
# Download the Excel file
from google.colab import files
files.download('results.xlsx')


Processing subject 1


  results_df = results_df.append({


Processing subject 2


  results_df = results_df.append({


Processing subject 3


  results_df = results_df.append({


Processing subject 4


  results_df = results_df.append({


Processing subject 5


  results_df = results_df.append({


Processing subject 6


  results_df = results_df.append({


Processing subject 7


  results_df = results_df.append({


Processing subject 8


  results_df = results_df.append({


Processing subject 9


  results_df = results_df.append({


Processing subject 10


  results_df = results_df.append({


Processing subject 11
    Subject  Average Accuracy  Average Loss  Average F1 Score  \
0       1.0          0.749815      0.536685          0.733488   
1       2.0          0.619430      0.659216          0.574406   
2       3.0          0.604000      0.654126          0.558287   
3       4.0          0.602759      0.653635          0.562037   
4       5.0          0.721434      0.540129          0.686612   
5       6.0          0.824492      0.411900          0.815322   
6       7.0          0.584286      0.673924          0.518093   
7       8.0          0.702424      0.578693          0.678171   
8       9.0          0.801147      0.443706          0.788114   
9      10.0          0.756190      0.499018          0.726607   
10     11.0          0.720831      0.523856          0.700743   

    Average AUC Score  
0            0.749825  
1            0.621538  
2            0.604000  
3            0.600095  
4            0.720830  
5            0.823382  
6            0.583636  
7    

  results_df = results_df.append({


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

NameError: ignored