In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import pyarrow
import matplotlib.pyplot as pltz
import antropy as ant
from utils import *

In [3]:
train = pd.read_csv('train.csv')

In [4]:
train

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,1628180742,0,0.0,353733,0,0.0,127492639,42516,Seizure,3,0,0,0,0,0
1,1628180742,1,6.0,353733,1,6.0,3887563113,42516,Seizure,3,0,0,0,0,0
2,1628180742,2,8.0,353733,2,8.0,1142670488,42516,Seizure,3,0,0,0,0,0
3,1628180742,3,18.0,353733,3,18.0,2718991173,42516,Seizure,3,0,0,0,0,0
4,1628180742,4,24.0,353733,4,24.0,3080632009,42516,Seizure,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106795,351917269,6,12.0,2147388374,6,12.0,4195677307,10351,LRDA,0,0,0,3,0,0
106796,351917269,7,14.0,2147388374,7,14.0,290896675,10351,LRDA,0,0,0,3,0,0
106797,351917269,8,16.0,2147388374,8,16.0,461435451,10351,LRDA,0,0,0,3,0,0
106798,351917269,9,18.0,2147388374,9,18.0,3786213131,10351,LRDA,0,0,0,3,0,0


In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [6]:
eeg1 = pd.read_parquet('train_eegs/351917269.parquet', engine = 'pyarrow')

### Preprocessing Data and Building Features  

Loading in the first 5k sub EEGs, replacing null values with the column median, standardizing the data, and running PCA to reduce the dimensions of the sub EEGs from 20 columns to 8 columns. Then using my functions to calculate the alpha and beta bands for each column and hjorth mobility and complexity for each column.

In [7]:
mobs, coms = [], []
alphas, betas = [], []

for i in range(5000):
    sub_eeg = preprocess(get_sub_eeg(train, i))
    pca_eeg = principal_components(sub_eeg, 8)
    f, pxx = welch(pca_eeg)
    alphas.append(get_band_features(f, pxx)[0])
    betas.append(get_band_features(f, pxx)[1])
    mobs.append(mobility_complexity(pca_eeg)[0])
    coms.append(mobility_complexity(pca_eeg)[1])

Building Dataframe

In [8]:
mobility_cols = ['component{}_mobility'.format(i + 1) for i in range(8)]
complexity_cols = ['component{}_complexity'.format(i + 1) for i in range(8)]
alpha_cols = ['component{}_alpha'.format(i + 1) for i in range(8)]
beta_cols = ['component{}_beta'.format(i + 1) for i in range(8)]

In [9]:
mob_df = pd.DataFrame(mobs, columns = mobility_cols)
com_df = pd.DataFrame(coms, columns = complexity_cols)
alpha_df = pd.DataFrame(alphas, columns = alpha_cols)
beta_df = pd.DataFrame(betas, columns = beta_cols)

In [10]:
band_df = pd.concat([alpha_df, beta_df], axis = 1)

In [11]:
hjorth_df = pd.concat([mob_df, com_df], axis = 1)

In [29]:
from sklearn.preprocessing import LabelEncoder

In [12]:
target = train['expert_consensus'].iloc[0:5000]

In [30]:
le = LabelEncoder()
target = le.fit_transform(target)

In [32]:
len(target)

5000

Splitting data into training and testing data.

In [33]:
from sklearn.model_selection import train_test_split

Running model with band features first.

In [34]:
band_train, band_test, y_train, y_test = train_test_split(band_df, target, test_size = 0.4)

In [35]:
y_train = pd.DataFrame(y_train).reset_index()
y_test = pd.DataFrame(y_test).reset_index()

In [36]:
y_train = y_train.drop(columns = 'index')

In [37]:
y_test = y_test.drop(columns = 'index')

In [39]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [40]:
from keras.models import Model
from keras.layers import Dense, Input

In [41]:
in_x = Input(shape = band_train.shape[1:])
x = Dense(12, input_dim=band_train.shape[1], activation='relu')(in_x)
x = Dense(8, activation='relu')(x)
x = Dense(8, activation='relu')(x)
out_x = Dense(6, activation='softmax')(x)

model = Model(in_x,out_x)

model.summary()

In [42]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [46]:
history=model.fit(band_train, 
                  y_train, 
                  validation_data=(band_test,y_test), 
                  epochs=100, batch_size=10)

Epoch 1/100


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(10, 1), output.shape=(10, 6)