In [5]:
import time
import seaborn as sns
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score,cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import  LabelEncoder
import pandas as pd
import numpy as np

data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Adult-Data2017.csv', na_values='?')
data.rename(columns={'Class/ASD': 'class'}, inplace=True)

for column in data.columns:
    if data[column].dtype == type(object):
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column].astype(str))

names = []
models = []

models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC()))
models.append(('AB', AdaBoostClassifier()))
models.append(('GBM', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('ET', ExtraTreesClassifier()))

X=data[['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
       'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']]
Y=data['class']

start_time = 0
elapsed_time = 0
confusion=0
pred=0

# Crossvalidation


In [6]:
seed = 7
kfold = KFold(n_splits=10, random_state=seed)
for name, m in models:
    start_time = time.time()
    pred = cross_val_predict(m, X,Y, cv=kfold )
    elapsed_time = time.time() - start_time   
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    error = (FP + FN) / float(TP + TN + FP + FN) # equal #error_rate = 1 - accuracy
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================')      




LR
0.0727
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.0407
96.1648
97.0874
93.6508
95.3691
92.9134
3.8352
CART
0.0379
94.3182
95.5340
91.0053
93.2696
89.5833
5.6818
NB
0.0280
96.5909
98.4466
91.5344
94.9905
93.5135
3.4091
KNN
0.0603
96.1648
96.8932
94.1799
95.5365
92.9504
3.8352
SVM
0.0639
98.5795
99.6117
95.7672
97.6894
97.3118
1.4205
AB
0.8006
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
0.7800
97.4432
98.6408
94.1799
96.4103
95.1872
2.5568
RF
1.3942
96.5909
97.6699
93.6508
95.6603
93.6508
3.4091
ET
1.0891
95.3125
96.5049
92.0635
94.2842
91.3386
4.6875


# leave

In [7]:

num_folds = 10
for name, m in models: 
    loocv = LeaveOneOut() 
    start_time=time.time()
    pred = cross_val_predict(m, X,Y, cv=loocv )
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================') 


LR
4.8408
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
2.4319
96.1648
97.2816
93.1217
95.2016
92.8760
3.8352
CART
1.7931
94.1761
94.9515
92.0635
93.5075
89.4602
5.8239
NB
1.6716
97.0170
98.4466
93.1217
95.7841
94.3700
2.9830
KNN
2.2342
96.5909
97.2816
94.7090
95.9953
93.7173
3.4091
SVM
4.3524
99.2898
99.6117
98.4127
99.0122
98.6737
0.7102
AB
55.2811
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
57.1263
97.8693
99.0291
94.7090
96.8691
95.9786
2.1307
RF
101.8040
96.7330
97.8641
93.6508
95.7574
93.8992
3.2670
ET
77.7598
96.0227
97.4757
92.0635
94.7696
92.5532
3.9773


# train test split

In [9]:
test_size = 0.3
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
for name, m in models:
    model = m
    start_time = time.time()
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y_test,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y_test, pred, average='binary')
    AUC=metrics.roc_auc_score(Y_test, pred)
    class_error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(class_error*100.0))
    print(' ===========================')

LR
0.0128
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.0042
98.5849
98.0519
100.0000
99.0260
97.4790
1.4151
CART
0.0032
91.9811
92.8571
89.6552
91.2562
85.9504
8.0189
NB
0.0039
97.6415
98.7013
94.8276
96.7644
95.6522
2.3585
KNN
0.0098
96.6981
97.4026
94.8276
96.1151
94.0171
3.3019
SVM
0.0056
98.5849
100.0000
94.8276
97.4138
97.3451
1.4151
AB
0.0774
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
0.0729
98.5849
100.0000
94.8276
97.4138
97.3451
1.4151
RF
0.1450
96.6981
98.0519
93.1034
95.5777
93.9130
3.3019
ET
0.1080
95.7547
98.0519
89.6552
93.8536
92.0354
4.2453
