In [2]:
from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
import pandas
from imblearn.over_sampling import SMOTE
import scipy.io
import numpy as np
import pandas as pd

In [4]:
data = scipy.io.loadmat('data/x.mat')  
print(data['x'].shape) 

(2042, 105, 79)


In [5]:
columns = np.asarray([['Electrode %d - %d/2 Hz'%(i+1, j+1)] for i in range(data['x'].shape[1]) for j in range(data['x'].shape[2])])
columns

array([['Electrode 1 - 1/2 Hz'],
       ['Electrode 1 - 2/2 Hz'],
       ['Electrode 1 - 3/2 Hz'],
       ...,
       ['Electrode 105 - 77/2 Hz'],
       ['Electrode 105 - 78/2 Hz'],
       ['Electrode 105 - 79/2 Hz']], dtype='<U23')

In [6]:
labels = pd.read_csv("data/table_withlabels.csv")
foof = pd.read_csv("data/foof2features.csv")

In [7]:
df = pd.DataFrame(data['x'].reshape((data['x'].shape[0], -1)))
df.columns = columns
df['IDs'] = foof['C1']
df = pd.merge(df, labels[['label', 'IDs']], on='IDs', how='inner')
print(df['label'].value_counts())
dataset = df.values
print(dataset.shape)


Other Neurodevelopmental Disorders    492
ADHD-Inattentive Type                 388
ADHD-Combined Type                    376
Anxiety Disorders                     241
No Diagnosis Given                    203
Depressive Disorders                   85
Name: label, dtype: int64
(1785, 8297)


In [8]:
X = dataset[:,0:8295].astype(float)
y = dataset[:,8296]




# scaling the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
#X = np.clip(X, -1, 1)

X.shape

(1785, 8295)

In [9]:

from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(y)

print(y)

[[0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]]


In [10]:
from sklearn.model_selection import train_test_split
trainX, testX, trainy, testy = train_test_split(X, y, stratify=y,test_size=0.3)

oversample = SMOTE() #oversample to aaccount for the data imbalance
trainX,trainy = oversample.fit_resample(trainX,trainy)
testX,testy = oversample.fit_resample(testX,testy)

print(trainX.shape)
testX.shape



(2064, 8295)




(888, 8295)

In [11]:


trainX = trainX.reshape((2064,105,79)) 
testX = testX.reshape((888,105,79)) 



In [12]:


from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def evaluate(y_test, y_pred, show_cm=False):
    print(y_test)
    print(y_pred)
    y_test = np.argmax(y_test, axis=1) # assuming you have n-by-6 class_prob
    y_pred = np.argmax(y_pred, axis=1) # assuming you have n-by-6 class_prob
    print(y_test)
    print(y_pred)
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Precision:", metrics.precision_score(y_test, y_pred, average='macro'))
    print("Recall:", metrics.recall_score(y_test, y_pred, average='macro'))
#     print("ROC AUC:", metrics.roc_auc_score(y_test, y_pred, multi_class='ovo',))
    print("F1 score:", metrics.f1_score(y_test, y_pred, average='macro'))
#     print("Brier Score:", metrics.brier_score_loss(y_test, y_pred)) # only for binary classification
    labels = ['Other Neurodevelopmental Disorders', 'ADHD-Inattentive Type', 'ADHD-Combined Type', 'Anxiety Disorders', 'No Diagnosis Given', 'Depressive Disorders']
    if show_cm:
        cm = confusion_matrix(y_test, y_pred, labels=labels)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
        disp.plot()
        plt.tight_layout()
        plt.show()



In [13]:
trainX = np.transpose(trainX, (0, 2,1))
testX = np.transpose(testX, (0, 2,1))


In [14]:
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
print('Tensorflow Version: ',tensorflow.__version__)
from tensorflow.keras.layers import BatchNormalization

Tensorflow Version:  2.7.0


In [15]:
# Initialising the CNN
model = Sequential()
# Convolution
model.add(Conv1D(filters = 32, kernel_size =4, input_shape = (79, 105), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters = 32, kernel_size = 5, input_shape = (79, 32), activation = 'relu', padding = 'same'))
# Pooling
model.add(MaxPooling1D(pool_size = 2))
# Convolution
model.add(Conv1D(filters = 64, kernel_size = 3, input_shape = (39, 32), activation = 'relu', padding = 'same'))
##model.add(Conv1D(filters = 64, kernel_size = 3, input_shape = (39, 64), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =128, kernel_size = 3, input_shape = (19, 64), activation = 'relu', padding = 'same'))
##model.add(Conv1D(filters =128, kernel_size = 3, input_shape = (19, 128), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =256, kernel_size = 3, input_shape = (9, 128), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters =256, kernel_size = 3, input_shape = (9, 256), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =512, kernel_size = 3, input_shape = (4, 256), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters =512, kernel_size = 3, input_shape = (4, 512), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters = 1024, kernel_size = 3, input_shape = (2, 512), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters = 1024, kernel_size = 3, input_shape = (2, 1028), activation = 'relu', padding = 'same'))
model.add(MaxPooling1D(pool_size = 2))
# Flattening
model.add(Flatten())
# Full connection
#model.add(Dense(units = 512, activation = 'relu'))
#model.add(Dropout(0.5))
#model.add(Dense(units = 128, activation = 'relu'))
# Add Dropout to prevent overfitting
#model.add(Dropout(0.5))
#model.add(Dense(units = 32, activation = 'relu'))
#model.add(Dropout(0.5))
model.add(Dense(units = 6, activation = 'softmax'))
# Compiling the CNN
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 79, 32)            13472     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 39, 32)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 39, 64)            6208      
                                                                 
 batch_normalization (BatchN  (None, 39, 64)           256       
 ormalization)                                                   
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 19, 64)           0         
 1D)                                                             
                                                        

In [16]:
# Initialising the CNN
model = Sequential()
# Convolution
model.add(Conv1D(filters = 32, kernel_size = 3, input_shape = (79, 105), activation = 'relu', padding = 'same'))
# Pooling
model.add(MaxPooling1D(pool_size = 2))
# Convolution
model.add(Conv1D(filters = 64, kernel_size = 3, input_shape = (39, 32), activation = 'relu', padding = 'same'))
model.add(Conv1D(filters = 64, kernel_size = 3, input_shape = (39, 64), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =128, kernel_size = 3, input_shape = (19, 64), activation = 'relu', padding = 'same'))
##model.add(Conv1D(filters =128, kearnel_size = 3, input_shape = (19, 128), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =256, kernel_size = 3, input_shape = (9, 128), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters =256, kernel_size = 3, input_shape = (9, 256), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters =512, kernel_size = 3, input_shape = (4, 256), activation = 'relu', padding = 'same'))
#model.add(Conv1D(filters =512, kernel_size = 3, input_shape = (4, 512), activation = 'relu', padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = 2))
model.add(Conv1D(filters = 1024, kernel_size = 3, input_shape = (2, 512), activation = 'relu', padding = 'same'))
model.add(Conv1D(filters = 1024, kernel_size = 3, input_shape = (2, 1028), activation = 'relu', padding = 'same'))
model.add(MaxPooling1D(pool_size = 2))
# Flattening
model.add(Flatten())

model.add(Dense(units = 6, activation = 'softmax'))
# Compiling the CNN
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_6 (Conv1D)           (None, 79, 32)            10112     
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 39, 32)           0         
 1D)                                                             
                                                                 
 conv1d_7 (Conv1D)           (None, 39, 64)            6208      
                                                                 
 conv1d_8 (Conv1D)           (None, 39, 64)            12352     
                                                                 
 batch_normalization_4 (Batc  (None, 39, 64)           256       
 hNormalization)                                                 
                                                                 
 max_pooling1d_7 (MaxPooling  (None, 19, 64)          

In [19]:
history = model.fit(trainX, trainy, validation_split=0.2, epochs=2, verbose=1)
# evaluate the model
evaluate(testy, model.predict(testX))#, show_cm=True)
#evaluate(trainy, model.predict(trainX))#, show_cm=True)


Epoch 1/2
Epoch 2/2
[[1 0 0 0 0 0]
 [1 0 0 0 0 0]
 [0 1 0 0 0 0]
 ...
 [0 0 0 0 1 0]
 [0 0 0 0 1 0]
 [0 0 0 0 1 0]]
[[6.90738335e-02 6.53154373e-01 4.88215350e-02 2.96107922e-02
  4.29033861e-02 1.56435996e-01]
 [1.76301375e-01 3.12826216e-01 6.01986982e-02 6.90569170e-03
  3.82769145e-02 4.05491084e-01]
 [5.47418356e-01 1.83356762e-01 5.75538166e-02 4.39276081e-03
  4.37455140e-02 1.63532868e-01]
 ...
 [1.42905414e-01 5.40908873e-01 5.53859212e-02 2.59473752e-02
  5.79080172e-02 1.76944479e-01]
 [7.84091473e-01 1.16123036e-02 1.09364770e-01 3.32288851e-04
  7.29466137e-03 8.73045027e-02]
 [6.68763638e-01 1.04118735e-01 2.83516310e-02 7.57545419e-03
  8.57784525e-02 1.05412178e-01]]
[0 0 1 4 3 5 1 1 5 1 1 5 4 0 1 5 0 5 1 5 1 2 0 5 5 2 1 5 5 5 5 4 4 5 0 0 5
 1 2 0 1 0 2 5 5 5 5 1 4 2 1 2 5 0 0 0 2 1 5 1 5 0 2 5 0 2 2 5 2 0 1 5 2 4
 5 1 5 1 5 2 1 5 3 4 4 4 2 2 1 1 0 1 3 5 2 5 0 0 4 0 0 5 5 5 5 5 0 3 4 5 2
 1 0 4 0 2 1 2 1 5 4 0 5 5 2 5 0 0 0 2 0 1 4 5 5 1 0 5 0 5 2 1 0 2 1 2 1 4
 3 0 2 5

In [21]:
model.predict(testX)

array([[6.90738335e-02, 6.53154373e-01, 4.88215350e-02, 2.96107922e-02,
        4.29033861e-02, 1.56435996e-01],
       [1.76301375e-01, 3.12826216e-01, 6.01986982e-02, 6.90569170e-03,
        3.82769145e-02, 4.05491084e-01],
       [5.47418356e-01, 1.83356762e-01, 5.75538166e-02, 4.39276081e-03,
        4.37455140e-02, 1.63532868e-01],
       ...,
       [1.42905414e-01, 5.40908873e-01, 5.53859212e-02, 2.59473752e-02,
        5.79080172e-02, 1.76944479e-01],
       [7.84091473e-01, 1.16123036e-02, 1.09364770e-01, 3.32288851e-04,
        7.29466137e-03, 8.73045027e-02],
       [6.68763638e-01, 1.04118735e-01, 2.83516310e-02, 7.57545419e-03,
        8.57784525e-02, 1.05412178e-01]], dtype=float32)