# Predictive modelling

Here we first use support vector machine for predicting the type of epilepsy syndrome (SDx) from T1W MRI and DTI data, then compare the performance with other ML methods



In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from lazypredict.Supervised import LazyClassifier


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize


import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure

from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV


from sklearn.metrics import roc_curve,roc_auc_score, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
img_dat_nc=pd.read_csv("/Users/seymour/Desktop/SuStaIn_15_June/Andre_sent_21_July/img_dat_nc.csv")
img_dat_nc["SubjID"] = img_dat_nc["Site"] + img_dat_nc["SubjID"].astype(str)+ img_dat_nc["SDx_dti"].astype(str)
img_dat_nc["SubjID"] = img_dat_nc["SubjID"].str[:-2]

In [None]:
t1_feats=['LLatVent','RLatVent','Lthal','Rthal','Lcaud','Rcaud','Lput','Rput','Lpal','Rpal','Lhippo','Rhippo','Lamyg','Ramyg','Laccumb','Raccumb','L_frontalL_volume','L_parietalL_volume','L_temporalL_volume','L_occipitalL_volume','L_cingulateC_volume','R_frontalL_volume','R_parietalL_volume','R_temporalL_volume','R_occipitalL_volume','R_cingulateC_volume']
t1_fa_feats_covs=['CC_FA','CST.L_FA','CST.R_FA','EC.L_FA','EC.R_FA','FX.ST.L_FA','FX.ST.R_FA','PTR.L_FA','PTR.R_FA','SLF.L_FA','SLF.R_FA','SS.L_FA','SS.R_FA','UNC.L_FA','UNC.R_FA','IC.L_FA','IC.R_FA','CR.L_FA','CR.R_FA','FO.L_FA','FO.R_FA','CG.L_FA','CG.R_FA','LLatVent','RLatVent','Lthal','Rthal','Lcaud','Rcaud','Lput','Rput','Lpal','Rpal','Lhippo','Rhippo','Lamyg','Ramyg','Laccumb','Raccumb','L_frontalL_volume','L_parietalL_volume','L_temporalL_volume','L_occipitalL_volume','L_cingulateC_volume','R_frontalL_volume','R_parietalL_volume','R_temporalL_volume','R_occipitalL_volume','R_cingulateC_volume','CC_MD','CST.L_MD','CST.R_MD','EC.L_MD','EC.R_MD','FX.ST.L_MD','FX.ST.R_MD','PTR.L_MD','PTR.R_MD','SLF.L_MD','SLF.R_MD','SS.L_MD','SS.R_MD','UNC.L_MD','UNC.R_MD','IC.L_MD','IC.R_MD','CR.L_MD','CR.R_MD','FO.L_MD','FO.R_MD','CG.L_MD','CG.R_MD','RESPONSE','SDx_dti']
t1_fa_md_feats=['CC_FA','CST.L_FA','CST.R_FA','EC.L_FA','EC.R_FA','FX.ST.L_FA','FX.ST.R_FA','PTR.L_FA','PTR.R_FA','SLF.L_FA','SLF.R_FA','SS.L_FA','SS.R_FA','UNC.L_FA','UNC.R_FA','IC.L_FA','IC.R_FA','CR.L_FA','CR.R_FA','FO.L_FA','FO.R_FA','CG.L_FA','CG.R_FA','LLatVent','RLatVent','Lthal','Rthal','Lcaud','Rcaud','Lput','Rput','Lpal','Rpal','Lhippo','Rhippo','Lamyg','Ramyg','Laccumb','Raccumb','L_frontalL_volume','L_parietalL_volume','L_temporalL_volume','L_occipitalL_volume','L_cingulateC_volume','R_frontalL_volume','R_parietalL_volume','R_temporalL_volume','R_occipitalL_volume','R_cingulateC_volume','CC_MD','CST.L_MD','CST.R_MD','EC.L_MD','EC.R_MD','FX.ST.L_MD','FX.ST.R_MD','PTR.L_MD','PTR.R_MD','SLF.L_MD','SLF.R_MD','SS.L_MD','SS.R_MD','UNC.L_MD','UNC.R_MD','IC.L_MD','IC.R_MD','CR.L_MD','CR.R_MD','FO.L_MD','FO.R_MD','CG.L_MD','CG.R_MD']

In [None]:
img_dat_nc1=img_dat_nc[t1_fa_feats_covs]
img_dat_nc1["SDx_dti"] = img_dat_nc1["SDx_dti"].map({3: 0, 4: 1})
img_dat_nc1=img_dat_nc1.dropna()
print(img_dat_nc.shape)
print(img_dat_nc1.shape)

In [None]:
X=img_dat_nc1[t1_feats]
X_normalized=normalize(X,axis=0)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_normalized, img_dat_nc1.SDx_dti, test_size=0.3,random_state=109) # 70% training and 30% test

In [None]:
y_train.shape

In [None]:
param_grid = {'C':[1,10,100,1000],'gamma':[0.1,0.001,0.0001], 'kernel':['linear','rbf','sigmoid']}
# Create the model
grid=GridSearchCV(SVC(),param_grid, n_jobs=-1)
grid.fit(X_train,y_train)

In [None]:
# Predict the test data
predicted = grid.predict(X_test)
roc_auc = roc_auc_score(y_test, predicted)
print(roc_auc)

In [None]:
print("Accuracy:",accuracy_score(y_test, predicted))

In [None]:
# Defines and builds the lazyclassifier
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models_train,predictions_train = clf.fit(X_train, X_train, y_train, y_train)
models_test,predictions_test = clf.fit(X_train, X_test, y_train, y_test)

# Prints the model performance
models_train
     

In [None]:
plt.figure(figsize=(10, 5))
sns.set_theme(style="whitegrid")
ax = sns.barplot(x=models_train.index, y="Accuracy", data=models_train)
plt.xticks(rotation=90)
     

In [None]:
img_dat_nc1["SDx_dti"].value_counts()

In [None]:
from keras.models import Sequential 
from keras.layers import Dense,Activation,Dropout 
from tensorflow.keras.layers import BatchNormalization
from keras.utils import np_utils
#import keras

from tensorflow.keras.callbacks import TensorBoard
import keras_tuner as kt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from tensorflow.keras import layers
import keras_tuner
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
import time
import pickle

LOG_DIR = f"{int(time.time())}"

In [None]:
#Build a neural network
def model_builder(hp):
    model = keras.Sequential()
    model.add(Dense(100,input_dim=26,activation='sigmoid'))
    #model.add(keras.layers.Flatten(input_shape=(28, 28)))

    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    model.add(keras.layers.Dense(units=hp_units, activation='sigmoid'))
    model.add(keras.layers.Dense(2))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-1,1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                    loss=keras.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])

    return model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='accuracy',
                     max_epochs=50,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')


In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)


In [None]:
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")


In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=500, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))


In [None]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train, y_train, epochs=best_epoch, validation_split=0.2)


In [None]:
hypermodel.summary()

In [None]:
eval_result = hypermodel.evaluate(X_test, y_test)
print("[test loss, test accuracy]:", eval_result)


In [None]:
from sklearn.metrics import auc
y_pred_rf = hypermodel.predict(X_test)[:, 1]
fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test, y_pred_rf)
auc_rf = auc(fpr_rf, tpr_rf)
auc_rf

In [None]:
X_test.shape

In [None]:
y_pred_rf = hypermodel.predict(X_test)
y_pred_rf.shape