In [1]:
%config Completer.use_jedi = False

In [2]:
# importing required libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import RandomizedSearchCV, LeaveOneOut, GridSearchCV, cross_val_score, train_test_split
from sklearn.feature_selection import VarianceThreshold
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import scikitplot as skplt
from IPython.display import clear_output
from scipy.stats.stats import pearsonr
from matplotlib.colors import ListedColormap
from sklearn import neighbors
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

# *Data Cleaning*

In [8]:
df = pd.read_csv("finaldf_orderd.csv").drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,Subject_ID,diagadhd,CHILDgender,age,sws_total,gmd_17Networks_1,zfALFF_17Networks_1,zDCwei_17Networks_1,zReHo_17Networks_1,gmd_17Networks_10,...,zDCwei_vCa_R,zReHo_vCa_R,gmd_vmPu_L,zfALFF_vmPu_L,zDCwei_vmPu_L,zReHo_vmPu_L,gmd_vmPu_R,zfALFF_vmPu_R,zDCwei_vmPu_R,zReHo_vmPu_R
0,sub-0015,0,1,13.0,1.513333,1.30494,-0.129129,-0.255912,-0.14468,0.425599,...,-0.164447,-0.464885,0.44339,-0.571002,-0.3035,-0.378953,0.563376,-0.601837,-0.388848,-0.467961
1,sub-0085,0,0,11.5,2.56,0.416967,-0.370917,0.014907,-0.385798,0.401915,...,-0.535982,-0.579071,0.550866,-0.168972,-0.651936,-0.535457,0.428719,-0.439474,-0.705622,-0.601932
2,sub-0134,0,1,13.0,2.346667,1.376817,0.249346,0.287834,-0.0044,0.44698,...,0.226537,-0.06023,0.743458,-0.105006,-0.325386,0.053537,0.5818,-0.22917,-0.227449,-0.026262
3,sub-0152,0,0,11.5,2.066667,0.620056,-0.054428,0.011391,-0.038562,0.456898,...,-0.237428,-0.031055,0.691905,-0.262465,-0.10328,0.214787,0.638877,-0.411452,-0.345689,0.352952
4,sub-0260,1,1,11.5,3.12,0.760185,-0.241427,-0.121562,-0.235275,0.387195,...,0.283735,0.201138,0.350098,-0.357051,-0.141482,0.0632,0.487531,-0.119186,-0.046044,0.38386


In [9]:
X = df.iloc[:, 5:].values
y = df.iloc[:, 1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123)
scalar = StandardScaler().fit(X_train)
X_train = scalar.transform(X_train)
X_test = scalar.transform(X_test)

# *Define functions*

In [10]:
def Leave_one_out(Data, model, PrincipalComponent = False, LinearDiscriminant = False):
    X = Data.iloc[:, 2:].values
    #sel = VarianceThreshold()
    #sel.fit_transform(X)
    y = Data.iloc[:, 1].values
    loo = LeaveOneOut()
    #model = RandomForestClassifier()
    scores = []
    preds = []
    actuals = []
    pca = PCA(n_components=10)    
    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        scalar = MinMaxScaler().fit(X_train)
        X_train = scalar.transform(X_train)
        X_test = scalar.transform(X_test)

        if PrincipalComponent == True:
            pca = pca.fit(X_train)
            X_train = pca.transform(X_train)
            X_test = pca.transform(X_test)
        if LinearDiscriminant == True:
            lda = LinearDiscriminantAnalysis()
            lda.fit(X_train, y_train)
            X_train = lda.transform(X_train)
            X_test = lda.transform(X_test)

        model.fit(X_train, y_train)
        preds.append(model.predict(X_test)[0])
        actuals.append(y_test[0])
        scores.append(model.score(X_test, y_test))
    
    return preds, actuals, scores

In [11]:
def Hyper_Tune(clf, param_grid):
    gs = GridSearchCV(estimator=clf, param_grid = param_grid,
                 cv = 3, scoring = 'accuracy', refit = True)
    return gs

# *Leave One Out*

 - LOO with K-NN

In [12]:
clf = neighbors.KNeighborsClassifier()
k_range = list(range(1, 31))
param_grid = dict(n_neighbors=k_range)
clf_KNN = Hyper_Tune(clf, param_grid)
Preds, Actuals, Scores = Leave_one_out(df, clf_KNN, PrincipalComponent=False, LinearDiscriminant=True)
print("KNN-LOO Accuracy: ", np.mean(Scores), "\n")
confusion = confusion_matrix(Actuals, Preds)
print("Confusion Matrix: ")
print(confusion, "\n")
print(classification_report(Actuals, Preds))

KNN-LOO Accuracy:  0.46153846153846156 

Confusion Matrix: 
[[46 25]
 [38  8]] 

              precision    recall  f1-score   support

           0       0.55      0.65      0.59        71
           1       0.24      0.17      0.20        46

    accuracy                           0.46       117
   macro avg       0.40      0.41      0.40       117
weighted avg       0.43      0.46      0.44       117



- LOO with SVM

In [13]:
clf = SVC(random_state=1, probability=True)
param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']}
clf_SVC = Hyper_Tune(clf, param_grid)
Preds, Actuals, Scores = Leave_one_out(df, clf_SVC, PrincipalComponent=False, LinearDiscriminant=True)
print("SVM-LOO Accuracy: ", np.mean(Scores), "\n")
confusion = confusion_matrix(Actuals, Preds)
print("Confusion Matrix: ")
print(confusion, "\n")
print(classification_report(Actuals, Preds))

SVM-LOO Accuracy:  0.5470085470085471 

Confusion Matrix: 
[[59 12]
 [41  5]] 

              precision    recall  f1-score   support

           0       0.59      0.83      0.69        71
           1       0.29      0.11      0.16        46

    accuracy                           0.55       117
   macro avg       0.44      0.47      0.42       117
weighted avg       0.47      0.55      0.48       117



- LOO with Random Forest

In [14]:
clf = RandomForestClassifier()
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 100, num = 91)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_sample_split = [2, 5, 10]
min_sample_leaf = [1, 2, 4]
bootstrap = [True, False]
param_grid = {'n_estimators': n_estimators,
              'max_features': max_features,
              'max_depth': max_depth,
              'min_samples_split': min_sample_split,
              'min_samples_leaf': min_sample_leaf,
              'bootstrap': bootstrap}
clf_RF = Hyper_Tune(clf, param_grid)
Preds, Actuals, Scores = Leave_one_out(df, clf_RF, PrincipalComponent=False, LinearDiscriminant=True)
print("RF-LOO Accuracy: ", np.mean(Scores), "\n")
confusion = confusion_matrix(Actuals, Preds)
print("Confusion Matrix: ")
print(confusion, "\n")
print(classification_report(Actuals, Preds))

KeyboardInterrupt: 

- LOO with MLP

In [417]:
clf_MLP = MLPClassifier(solver='lbfgs', alpha=1e-4, hidden_layer_sizes=(12, 4), random_state=1)
Preds, Actuals, Scores = Leave_one_out(df, clf_MLP, PrincipalComponent=False, LinearDiscriminant=True)
print("MLP-LOO Accuracy: ", np.mean(Scores), "\n")
confusion = confusion_matrix(Actuals, Preds)
print("Confusion Matrix: ")
print(confusion, "\n")
print(classification_report(Actuals, Preds))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LOO Accuracy:  0.5811965811965812 

Confusion Matrix: 
[[45 26]
 [23 23]] 

              precision    recall  f1-score   support

           0       0.66      0.63      0.65        71
           1       0.47      0.50      0.48        46

    accuracy                           0.58       117
   macro avg       0.57      0.57      0.57       117
weighted avg       0.59      0.58      0.58       117



- Ensemble Voting Model

In [433]:
clf_ensemble = VotingClassifier(
    estimators=[('Random Forest', clf_RF), ('K-NN', clf_KNN), ('SVC', clf_SVC)],
    voting='hard')
Preds, Actuals, Scores = Leave_one_out(df, clf_ensemble, PrincipalComponent=False)
print("LOO Accuracy: ", np.mean(Scores), "\n")
confusion = confusion_matrix(Actuals, Preds)
print("Confusion Matrix: ")
print(confusion, "\n")
print(classification_report(Actuals, Preds))

LOO Accuracy:  0.6324786324786325 

Confusion Matrix: 
[[65  6]
 [37  9]] 

              precision    recall  f1-score   support

           0       0.64      0.92      0.75        71
           1       0.60      0.20      0.30        46

    accuracy                           0.63       117
   macro avg       0.62      0.56      0.52       117
weighted avg       0.62      0.63      0.57       117



# *Neural Networks*
- Multi Layer Perceptron (MLP)

In [401]:
clf_MLP = MLPClassifier(solver='lbfgs', alpha=1e-4, hidden_layer_sizes=(12, 4), random_state=1)
clf_MLP.fit(X_train, y_train)
print("Score on test: ", clf_MLP.score(X_test, y_test), "\n")
confusion = confusion_matrix(y_test, clf_MLP.predict(X_test))
print("Confusion Matrix: ")
print(confusion)
print(classification_report(y_test, clf_MLP.predict(X_test)))

Score on test:  0.875 

Confusion Matrix: 
[[15  3]
 [ 0  6]]
              precision    recall  f1-score   support

           0       1.00      0.83      0.91        18
           1       0.67      1.00      0.80         6

    accuracy                           0.88        24
   macro avg       0.83      0.92      0.85        24
weighted avg       0.92      0.88      0.88        24

