In [2]:
import pandas as pd
import numpy as np

In [11]:
eeg_data = pd.read_csv("eeg_data/eeg_dataset.csv")
eeg_df = pd.DataFrame(eeg_data)
diagnoses = eeg_df['specific.disorder'].unique()
print(diagnoses)

ptsd_data = eeg_df[eeg_df['specific.disorder'].isin(['Posttraumatic stress disorder', 'Healthy control'])]
ptsd_data = ptsd_data.reset_index(drop=True)
ptsd_data = ptsd_data.ffill()
ptsd_data.head()

['Alcohol use disorder' 'Acute stress disorder' 'Depressive disorder'
 'Healthy control' 'Behavioral addiction disorder'
 'Obsessive compulsitve disorder' 'Schizophrenia' 'Panic disorder'
 'Social anxiety disorder' 'Posttraumatic stress disorder'
 'Adjustment disorder' 'Bipolar disorder']


Unnamed: 0,no.,sex,age,eeg.date,education,IQ,main.disorder,specific.disorder,AB.A.delta.a.FP1,AB.A.delta.b.FP2,...,COH.F.gamma.o.Pz.p.P4,COH.F.gamma.o.Pz.q.T6,COH.F.gamma.o.Pz.r.O1,COH.F.gamma.o.Pz.s.O2,COH.F.gamma.p.P4.q.T6,COH.F.gamma.p.P4.r.O1,COH.F.gamma.p.P4.s.O2,COH.F.gamma.q.T6.r.O1,COH.F.gamma.q.T6.s.O2,COH.F.gamma.r.O1.s.O2
0,114,F,34.16,2018.6.29,18.0,119.0,Healthy control,Healthy control,72.431249,69.386059,...,78.99716,67.622353,70.376986,66.281673,82.552243,57.823441,69.899527,52.622502,73.606786,64.037927
1,115,F,42.35,2018.6.29,16.0,110.0,Healthy control,Healthy control,19.579138,19.703305,...,67.123295,38.645146,47.996626,48.602407,66.139329,40.072587,62.466377,27.778563,59.736363,47.235652
2,116,F,35.16,2018.6.30,16.0,122.0,Healthy control,Healthy control,17.916294,22.911557,...,92.844007,81.67353,78.755278,83.905221,87.90345,75.363909,85.757201,73.195374,85.954023,82.878713
3,117,F,25.65,2018.7.2,18.0,118.0,Healthy control,Healthy control,20.081892,17.928614,...,99.447826,99.424714,44.181841,61.683927,99.513428,43.007308,62.525177,42.766646,62.843978,39.895496
4,118,F,18.89,2018.7.3,12.0,108.0,Healthy control,Healthy control,18.429434,26.156602,...,79.946089,72.771884,64.189258,69.407868,61.586105,45.679693,51.780683,61.221166,75.595992,69.627654


**Data Information:**  
This dataset contains the absolute PSD values for each channel (already preprocessed), as well as the coherence values between all channels.
For my first analysis, I want to use data from only 19 relevant selected channels. I also don't want the coherence values just yet -- apparently, the neural networks have much better accuracy with just the PSD values. For PTSD, the best feature was the beta PSD value (not sure exactly which PSD channel, several of them are beta).

In [12]:
# print(ptsd_data.columns)
# psd_columns = [col for col in ptsd_data.columns if "psd" in col]
# print(psd_columns)

# alpha_columns = [col for col in ptsd_data.columns if "alpha" in col]
# print(alpha_columns)

selected_channels = ['FP1', 'FP2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2']

channels = ['age', 'education', 'IQ'] # starting off with these values
labels = ptsd_data['specific.disorder']

for channel in selected_channels:
    select = [col for col in ptsd_data.columns if channel in col and "COH" not in col] # don't want coherence values
    for column in select:
        channels.append(column)
        
eeg_select = ptsd_data[channels]

print(eeg_select)

       age  education     IQ  AB.A.delta.a.FP1  AB.B.theta.a.FP1  \
0    34.16       18.0  119.0         72.431249         51.701754   
1    42.35       16.0  110.0         19.579138         11.377117   
2    35.16       16.0  122.0         17.916294         17.258618   
3    25.65       18.0  118.0         20.081892          8.278579   
4    18.89       12.0  108.0         18.429434         18.166703   
..     ...        ...    ...               ...               ...   
142  22.00       13.0  116.0         41.851823         31.873081   
143  26.00       13.0  118.0         18.986856         10.671751   
144  26.00       16.0  113.0         28.781317         25.481891   
145  24.00       13.0  107.0         19.929100         13.723550   
146  21.00       13.0  105.0         65.195346         35.700361   

     AB.C.alpha.a.FP1  AB.D.beta.a.FP1  AB.E.highbeta.a.FP1  AB.F.gamma.a.FP1  \
0          139.054594        21.060727             4.011234          3.303234   
1            5.907418

In [13]:
eeg_select = eeg_select.ffill()

In [14]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
labels = encoder.fit_transform(labels)

In [15]:
print(labels)
print(len(labels))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
147


Control = 0, PTSD = 1.

First: Logistic Regression

In [16]:
import numpy as np
import pandas as pd
import time as time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

def logisticregression(max_iter, X_train, X_test, Y_train, Y_test, label):

    t0 = time.time()

    clf = LogisticRegression(max_iter=max_iter)
    clf = clf.fit(X_train, Y_train)
    print("Logistic Fit in Time: %0.3f" % (time.time() - t0))

    t0 = time.time()
    pred = clf.predict(X_test)

    print("Prediction done in %0.3fs" % (time.time() - t0))
    prediction_error = np.sum(pred != Y_test) / len(Y_test)

    print("prediction error: %0.3f" % prediction_error)

    accuracy = 100 - (prediction_error*100)

    print(f"accuracy for {label}: {accuracy}%")

Subcategory: All absolute PSD features

In [17]:
# Splitting the data:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(eeg_select, labels, test_size = 0.2, random_state=42)

# Run the model:
logisticregression(1000, X_train, X_test, Y_train, Y_test, label="all absolute PSD features")

Logistic Fit in Time: 0.088
Prediction done in 0.001s
prediction error: 0.133
accuracy for all absolute PSD features: 86.66666666666667%


Subcategory: all features

In [20]:
ptsd_data_clean = ptsd_data.drop(columns=['sex', 'eeg.date', 'main.disorder', 'specific.disorder'])
ptsd_data_clean = ptsd_data_clean.fillna(0)

X_train, X_test, Y_train, Y_test = train_test_split(ptsd_data_clean, labels, test_size = 0.2, random_state=42)

logisticregression(1000, X_train, X_test, Y_train, Y_test, label="all features")

Logistic Fit in Time: 0.073
Prediction done in 0.005s
prediction error: 0.267
accuracy for all features: 73.33333333333333%


Subcategory: how about with no IQ?

In [134]:
eeg_ptsd_noIQ = eeg_select.drop(columns=['IQ'])

eeg_ptsd_noIQ = eeg_ptsd_noIQ.ffill()

X_train_noIQ, X_test_noIQ, Y_train, Y_test = train_test_split(eeg_ptsd_noIQ, labels, test_size = 0.2, random_state=42)

logisticregression(1000, X_train_noIQ, X_test_noIQ, Y_train, Y_test, label="no IQ")

Logistic Fit in Time: 0.091
Prediction done in 0.000s
prediction error: 0.167
accuracy for no IQ: 83.33333333333334%


**Affected MORE by change in test size (greater error compared to no IQ when test size is increased to 0.5). Otherwise, including IQ improves the performance by about 3%.**

Subcategory: most relevant (beta PSD)

In [142]:
ptsd_beta_PSD = eeg_select[['age', 'education', 'IQ'] + [col for col in eeg_select.columns if "beta" in col and "high" not in col]]

X_train_betaPSD, X_test_betaPSD, Y_train, Y_test = train_test_split(ptsd_beta_PSD, labels, test_size = 0.2, random_state=42)

logisticregression(1000, X_train_betaPSD, X_test_betaPSD, Y_train, Y_test, label="just absolute PSD features for the beta frequency")

Logistic Fit in Time: 0.059
Prediction done in 0.001s
prediction error: 0.100
accuracy for just absolute PSD features for the beta frequency: 90.0%


In [144]:
# What if we try JUST using the EEG signal, and also JUST using the age, education and IQ?

just_eeg = eeg_select.iloc[:, 3:]
X_train_justEEG, X_test_justEEG, Y_train, Y_test = train_test_split(just_eeg, labels, test_size = 0.2, random_state=42)
logisticregression(1000, X_train_justEEG, X_test_justEEG, Y_train, Y_test, label="just EEG")

Logistic Fit in Time: 0.129
Prediction done in 0.001s
prediction error: 0.433
accuracy for just EEG: 56.666666666666664%


In [147]:
just_first3 = eeg_select.iloc[:, :3]
X_train_just_first3, X_test_just_first3, Y_train, Y_test = train_test_split(just_eeg, labels, test_size = 0.2, random_state=42)
logisticregression(1000, X_train_just_first3, X_test_just_first3, Y_train, Y_test, label="just age, education and IQ")

Logistic Fit in Time: 0.113
Prediction done in 0.000s
prediction error: 0.433
accuracy for just age, education and IQ: 56.666666666666664%


For both of those, it just turns into a Naive Bayes.

Second: 3-layer NN

Third: CNN

Fourth: Convolutional layer + transformer

In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models

Trying just the absolute PSD (no COH values yet)

In [30]:
input_shape = (117, 1)

In [31]:
def transformer_model(input_shape):
    model = Sequential()
    inputs = Input(shape=input_shape)

    x = layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.LayerNormalization()(x)

    # MultiHeadAttention layer (Using functional API)
    attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=64)(x, x) 
    x = layers.Dropout(0.4)(attention_output)
    x = layers.LayerNormalization()(x)

    # Fully Connected Layer
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)

    # Global Average Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Output Layer
    outputs = layers.Dense(1, activation='sigmoid')(x)

    # Create the model
    model = models.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(eeg_select, labels, test_size = 0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)

X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()

X_train_reshaped = X_train.reshape(X_train.shape[0], 117, 1)
X_val_reshaped = X_val.reshape(X_val.shape[0], 117, 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], 117, 1)

model = transformer_model(input_shape=(117,1))

model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error: %.2f%%" % (100-scores[1]*100))

model.summary()

Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.5207 - loss: 0.7966 - val_accuracy: 0.5862 - val_loss: 0.6876
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5298 - loss: 0.6950 - val_accuracy: 0.2759 - val_loss: 0.7042
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5673 - loss: 0.6734 - val_accuracy: 0.7241 - val_loss: 0.6001
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5483 - loss: 0.7128 - val_accuracy: 0.7241 - val_loss: 0.6648
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6298 - loss: 0.6756 - val_accuracy: 0.7241 - val_loss: 0.6172
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6247 - loss: 0.6705 - val_accuracy: 0.7241 - val_loss: 0.6365
Epoch 7/10
[1m11/11[0m [32m━━━━━━━━━

Now adding positional embeddings:

In [38]:
# Define Positional Embedding Layer
def get_positional_embeddings(seq_len, embed_dim):
   # Generate a range of positions
    positions = tf.range(seq_len, dtype=tf.float32)  # Shape (seq_len,)
    
    # Create div_term
    div_term = tf.exp(tf.cast(tf.range(0, embed_dim, 2), tf.float32) * -(tf.math.log(10000.0) / embed_dim))  # Shape (embed_dim // 2,)
    
    # Create sinusoidal positional encodings
    sinusoids = tf.reshape(positions, [-1, 1]) * div_term  # Shape (seq_len, embed_dim // 2)
    
    # Compute sine and cosine parts
    pos_encoding = tf.concat([tf.sin(sinusoids), tf.cos(sinusoids)], axis=-1)  # Shape (seq_len, embed_dim)
    
    return pos_encoding

In [39]:
def transformer_model_pos(input_shape, embed_dim):
    model = Sequential()
    inputs = Input(shape=input_shape)

    x = layers.Conv1D(filters=20, kernel_size=3, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling1D(pool_size=2)(x)

    # Positional Embeddings
    seq_len = input_shape[0]  # 117 (sequence length)
    positional_embeddings = get_positional_embeddings(seq_len, embed_dim)
    positional_embeddings = tf.expand_dims(positional_embeddings, axis=0)  # Shape (1, seq_len, embed_dim)
    
    # Embedding layer: Change input shape to [batch_size, seq_len, embed_dim]
    x = layers.Dense(embed_dim)(inputs)  # Project input to `embed_dim` dimensionality
    x = layers.Add()([x, positional_embeddings])  # Add positional embeddings to the input

    # Transformer Block
    x = layers.LayerNormalization()(x)
    attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=64)(x, x) 
    x = layers.Add()([x, attention_output])  # Add residual connection
    x = layers.LayerNormalization()(x)

    # Fully Connected Layer
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(embed_dim)(x)
    x = layers.Add()([x, attention_output])  # Add residual connection

    x = layers.GlobalAveragePooling1D()(x)  # Shape (batch_size, embed_dim)

    # Output Layer
    outputs = layers.Dense(1, activation='sigmoid')(x)

    # Create the model
    model = models.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [43]:
model = transformer_model_pos(input_shape=(117, 1), embed_dim = 64)

model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error with Positional Embedding: %.2f%%" % (100-scores[1]*100))

model.summary()

Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 59ms/step - accuracy: 0.4068 - loss: 0.9677 - val_accuracy: 0.2759 - val_loss: 0.8496
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4863 - loss: 0.7640 - val_accuracy: 0.7241 - val_loss: 0.5821
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.6330 - loss: 0.7153 - val_accuracy: 0.6897 - val_loss: 0.6742
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5384 - loss: 0.6723 - val_accuracy: 0.7241 - val_loss: 0.6010
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.6465 - loss: 0.6620 - val_accuracy: 0.6897 - val_loss: 0.6128
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.5666 - loss: 0.6922 - val_accuracy: 0.6552 - val_loss: 0.6167
Epoch 7/10
[1m11/11[0m [32m━━━━

Subcategory: PE with ALL features

In [45]:
ptsd_data_clean = ptsd_data.drop(columns=['sex', 'eeg.date', 'main.disorder', 'specific.disorder'])

X_train, X_test, Y_train, Y_test = train_test_split(ptsd_data_clean, labels, test_size = 0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)

X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()

X_train_reshaped = X_train.reshape(X_train.shape[0], 1145, 1)
X_val_reshaped = X_val.reshape(X_val.shape[0], 1145, 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], 1145, 1)

model = transformer_model_pos(input_shape=(1145, 1), embed_dim = 64)

model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error with Positional Embedding: %.2f%%" % (100-scores[1]*100))

model.summary()

Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 360ms/step - accuracy: 0.5903 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 326ms/step - accuracy: 0.6200 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 382ms/step - accuracy: 0.6091 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 453ms/step - accuracy: 0.5913 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 454ms/step - accuracy: 0.5646 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 483ms/step - accuracy: 0.5765 - loss: nan - val_accuracy: 0.7241 - val_loss: nan
Epoch 7/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

Subcategory: Positional Embedding on Beta PSD only

In [48]:
ptsd_beta_PSD = eeg_select[['age', 'education', 'IQ'] + [col for col in eeg_select.columns if "beta" in col and "high" not in col]]

X_train_betaPSD, X_test_betaPSD, Y_train, Y_test = train_test_split(ptsd_beta_PSD, labels, test_size = 0.4, random_state=42)
X_val_betaPSD, X_test_betaPSD, Y_val, Y_test = train_test_split(X_test_betaPSD, Y_test, test_size=0.5, random_state=42)

X_train_betaPSD = X_train_betaPSD.to_numpy()
X_val_betaPSD = X_val_betaPSD.to_numpy()
X_test_betaPSD = X_test_betaPSD.to_numpy()

X_train_reshaped = X_train_betaPSD.reshape(X_train.shape[0], 22, 1)
X_val_reshaped = X_val_betaPSD.reshape(X_val.shape[0], 22, 1)
X_test_reshaped = X_test_betaPSD.reshape(X_test.shape[0], 22, 1)

model = transformer_model_pos(input_shape = (22, 1), embed_dim=64)

model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error on beta PSD: %.2f%%" % (100-scores[1]*100))

model.summary()

Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 53ms/step - accuracy: 0.4765 - loss: 0.8432 - val_accuracy: 0.7241 - val_loss: 0.5860
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6044 - loss: 0.6361 - val_accuracy: 0.7241 - val_loss: 0.5839
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5121 - loss: 0.7691 - val_accuracy: 0.5172 - val_loss: 0.7388
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6299 - loss: 0.6752 - val_accuracy: 0.7241 - val_loss: 0.6264
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5166 - loss: 0.6858 - val_accuracy: 0.6207 - val_loss: 0.5933
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6528 - loss: 0.6997 - val_accuracy: 0.6897 - val_loss: 0.6137
Epoch 7/10
[1m11/11[0m [32m━━━━━━━━━

What if we perform it on the dataset with the coherence values as well?

In [None]:
ptsd_data_clean = ptsd_data.drop(columns=['sex', 'eeg.date', 'main.disorder', 'specific.disorder'])

model = transformer_model_pos(input_shape=(117, 1), embed_dim = 64)

model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error with Positional Embedding: %.2f%%" % (100-scores[1]*100))

model.summary()

In [24]:
X_train, X_test, Y_train, Y_test = train_test_split(ptsd_data_clean, labels, test_size = 0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)


X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()


X_train_reshaped = X_train.reshape(X_train.shape[0], 1145, 1)
X_val_reshaped = X_val.reshape(X_val.shape[0], 1145, 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], 1145, 1)

model = transformer_model_pos(input_shape=(1145, 1), embed_dim = 64)

model.fit(X_train_reshaped, Y_train, epochs=65, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error on PSD and COH data: %.2f%%" % (100-scores[1]*100))

model.summary()

NameError: name 'Sequential' is not defined

Now, just use the beta PSD:

In [60]:
ptsd_beta_PSD = eeg_select[['age', 'education', 'IQ'] + [col for col in eeg_select.columns if "beta" in col and "high" not in col]]

X_train_betaPSD, X_test_betaPSD, Y_train, Y_test = train_test_split(ptsd_beta_PSD, labels, test_size = 0.4, random_state=42)
X_val_betaPSD, X_test_betaPSD, Y_val, Y_test = train_test_split(X_test_betaPSD, Y_test, test_size=0.5, random_state=42)

X_train_betaPSD = X_train_betaPSD.to_numpy()
X_val_betaPSD = X_val_betaPSD.to_numpy()
X_test_betaPSD = X_test_betaPSD.to_numpy()

X_train_reshaped = X_train_betaPSD.reshape(X_train.shape[0], 22, 1)
X_val_reshaped = X_val_betaPSD.reshape(X_val.shape[0], 22, 1)
X_test_reshaped = X_test_betaPSD.reshape(X_test.shape[0], 22, 1)

model_final = transformer_model(input_shape = (22, 1))

model_final.fit(X_train_reshaped, Y_train, epochs=65, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model_final.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error on beta PSD: %.2f%%" % (100-scores[1]*100))

model_final.summary()

Epoch 1/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - accuracy: 0.5649 - loss: 0.7582 - val_accuracy: 0.7241 - val_loss: 0.5927
Epoch 2/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5293 - loss: 0.7565 - val_accuracy: 0.7241 - val_loss: 0.6298
Epoch 3/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5903 - loss: 0.6678 - val_accuracy: 0.7241 - val_loss: 0.5918
Epoch 4/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6292 - loss: 0.6644 - val_accuracy: 0.7241 - val_loss: 0.5880
Epoch 5/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5956 - loss: 0.6769 - val_accuracy: 0.7931 - val_loss: 0.4716
Epoch 6/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6853 - loss: 0.6053 - val_accuracy: 0.9310 - val_loss: 0.2361
Epoch 7/65
[1m11/11[0m [32m━━━━━━━━━

# Feature Ablation:

In [73]:
def ablation_study(model, X_train, X_val, X_test, Y_train, Y_val, Y_test, feature_index):
    # Remove the feature (set to zero or ean) and observe change in prediction
    modified_Xtrain = X_train.copy()
    modified_Xval = X_val.copy()
    modified_Xtest = X_test.copy()

    modified_Xtrain[:, feature_index, :] = 0  # Example: set one feature to zero
    modified_Xval[:, feature_index, :] = 0
    modified_Xtest[:, feature_index, :] = 0

    model.fit(modified_Xtrain, Y_train, epochs=65, batch_size=8, validation_data=(modified_Xval, Y_val))

    feature_scores = model.evaluate(modified_Xtest, Y_test, verbose = 0)
    
    diff = np.sum(scores != feature_scores)
    return diff


In [76]:
curr_diff = 0
best_feature = None
for feature in range(len(X_train_reshaped[1])):
    diff = ablation_study(model_final, X_train_reshaped, X_val_reshaped, X_test_reshaped, Y_train, Y_val, Y_test, feature)
    if diff > curr_diff:
        curr_diff = diff
        best_feature = feature

print(best_feature)
print(curr_diff)

Epoch 1/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5663 - loss: 1.3639 - val_accuracy: 0.6897 - val_loss: 0.6401
Epoch 2/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7052 - loss: 0.7468 - val_accuracy: 0.5517 - val_loss: 1.4100
Epoch 3/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6836 - loss: 0.6766 - val_accuracy: 0.3103 - val_loss: 1.3996
Epoch 4/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6522 - loss: 0.7636 - val_accuracy: 0.7241 - val_loss: 1.0157
Epoch 5/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7221 - loss: 0.5891 - val_accuracy: 0.5517 - val_loss: 0.8157
Epoch 6/65
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7336 - loss: 0.4732 - val_accuracy: 0.7586 - val_loss: 0.8166
Epoch 7/65
[1m11/11[0m [32m━━━━━━━━━

In [80]:
ptsd_just_beta =eeg_select[[col for col in eeg_select.columns if "beta" in col and "high" not in col]]

X_train_betaPSD, X_test_betaPSD, Y_train, Y_test = train_test_split(ptsd_just_beta, labels, test_size = 0.4, random_state=42)
X_val_betaPSD, X_test_betaPSD, Y_val, Y_test = train_test_split(X_test_betaPSD, Y_test, test_size=0.5, random_state=42)

X_train_betaPSD = X_train_betaPSD.to_numpy()
X_val_betaPSD = X_val_betaPSD.to_numpy()
X_test_betaPSD = X_test_betaPSD.to_numpy()

X_train_reshaped = X_train_betaPSD.reshape(X_train.shape[0], 19, 1)
X_val_reshaped = X_val_betaPSD.reshape(X_val.shape[0], 19, 1)
X_test_reshaped = X_test_betaPSD.reshape(X_test.shape[0], 19, 1)

model_final = transformer_model(input_shape = (19, 1))

model_final.fit(X_train_reshaped, Y_train, epochs=65, batch_size=8, validation_data=(X_val_reshaped, Y_val))

scores = model_final.evaluate(X_test_reshaped, Y_test, verbose = 0)
print("Transformer Error on beta PSD: %.2f%%" % (100-scores[1]*100))

model_final.summary()

Epoch 1/65


LookupError: gradient registry has no entry for: shap_AddV2