In [1]:
import time
import seaborn as sns
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score,cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import  LabelEncoder
import pandas as pd
import numpy as np

data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Child-Data2017.csv', na_values='?')
data.rename(columns={'Class/ASD': 'class'}, inplace=True)

for column in data.columns:
    if data[column].dtype == type(object):
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column].astype(str))

names = []
models = []

models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC()))
models.append(('AB', AdaBoostClassifier()))
models.append(('GBM', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('ET', ExtraTreesClassifier()))

X=data[['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
       'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']]
Y=data['class']

start_time = 0
elapsed_time = 0
confusion=0
pred=0

# Crossvalidation


In [2]:
seed = 7
kfold = KFold(n_splits=10, random_state=seed)
for name, m in models:
    start_time = time.time()
    pred = cross_val_predict(m, X,Y, cv=kfold )
    elapsed_time = time.time() - start_time   
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    error = (FP + FN) / float(TP + TN + FP + FN) # equal #error_rate = 1 - accuracy
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================')      




LR
0.0711
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.0503
96.9178
94.0397
100.0000
97.0199
96.9072
3.0822
CART
0.0240
93.8356
92.0530
95.7447
93.8988
93.7500
6.1644
NB
0.0265
88.6986
94.0397
82.9787
88.5092
87.6404
11.3014
KNN
0.0446
91.7808
88.0795
95.7447
91.9121
91.8367
8.2192
SVM
0.0376
98.2877
99.3377
97.1631
98.2504
98.2079
1.7123
AB
0.7105
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
0.6178
93.4932
92.7152
94.3262
93.5207
93.3333
6.5068
RF
1.3076
94.1781
93.3775
95.0355
94.2065
94.0351
5.8219
ET
0.9901
94.5205
94.7020
94.3262
94.5141
94.3262
5.4795


# leave

In [4]:

num_folds = 10
for name, m in models: 
    loocv = LeaveOneOut() 
    start_time=time.time()
    pred = cross_val_predict(m, X,Y, cv=loocv )
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================') 


LR
1.7298
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.8197
97.2603
94.7020
100.0000
97.3510
97.2414
2.7397
CART
0.6599
93.1507
91.3907
95.0355
93.2131
93.0556
6.8493
NB
0.6753
90.4110
95.3642
85.1064
90.2353
89.5522
9.5890
KNN
0.8332
93.4932
90.7285
96.4539
93.5912
93.4708
6.5068
SVM
1.1428
97.9452
98.6755
97.1631
97.9193
97.8571
2.0548
AB
20.2333
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
17.7480
94.8630
94.0397
95.7447
94.8922
94.7368
5.1370
RF
38.0344
93.8356
93.3775
94.3262
93.8519
93.6620
6.1644
ET
28.7177
93.8356
93.3775
94.3262
93.8519
93.6620
6.1644


# train test split

In [5]:
test_size = 0.3
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
for name, m in models:
    model = m
    start_time = time.time()
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y_test,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y_test, pred, average='binary')
    AUC=metrics.roc_auc_score(Y_test, pred)
    class_error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(class_error*100.0))
    print(' ====')

LR
0.0108
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
 ====
LDA
0.0038
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
 ====
CART
0.0021
87.5000
83.3333
92.5000
87.9167
87.0588
12.5000
 ====
NB
0.0029
88.6364
85.4167
92.5000
88.9583
88.0952
11.3636
 ====
KNN
0.0053
90.9091
83.3333
100.0000
91.6667
90.9091
9.0909
 ====
SVM
0.0034
94.3182
91.6667
97.5000
94.5833
93.9759
5.6818
 ====
AB
0.0786
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
 ====
GBM
0.0635
93.1818
87.5000
100.0000
93.7500
93.0233
6.8182
 ====
RF
0.1380
92.0455
89.5833
95.0000
92.2917
91.5663
7.9545
 ====
ET
0.1034
90.9091
89.5833
92.5000
91.0417
90.2439
9.0909
 ====
