In [2]:
import time
import seaborn as sns
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score,cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import  LabelEncoder
import pandas as pd
import numpy as np
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Child-Data2018.csv', na_values='?')
data.rename(columns={'Class': 'class'}, inplace=True)

for column in data.columns:
    if data[column].dtype == type(object):
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column].astype(str))

names = []
models = []

models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC()))
models.append(('AB', AdaBoostClassifier()))
models.append(('GBM', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('ET', ExtraTreesClassifier()))

X=data[['A1', 'A2', 'A3', 'A4', 'A5', 'A6','A7', 'A8', 'A9', 'A10']]
Y=data['class']

# Crossvalidation


In [3]:
seed = 7
kfold = KFold(n_splits=10, random_state=seed)
for name, m in models:
    start_time = time.time()
    pred = cross_val_predict(m, X,Y, cv=kfold )
    elapsed_time = time.time() - start_time   
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    error = (FP + FN) / float(TP + TN + FP + FN) # equal #error_rate = 1 - accuracy
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================')      



LR
0.0819
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.0606
95.8743
92.4603
99.2218
95.8411
96.0452
4.1257
CART
0.0287
94.1061
91.2698
96.8872
94.0785
94.3182
5.8939
NB
0.0304
89.3910
93.2540
85.6031
89.4285
89.0688
10.6090
KNN
0.0506
92.5344
88.4921
96.4981
92.4951
92.8839
7.4656
SVM
0.0618
98.6248
98.0159
99.2218
98.6188
98.6460
1.3752
AB
0.8054
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
0.7463
97.2495
95.6349
98.8327
97.2338
97.3180
2.7505
RF
1.4348
95.2849
92.8571
97.6654
95.2613
95.4373
4.7151
ET
1.1262
94.4990
92.8571
96.1089
94.4830
94.6360
5.5010


# leave  


In [4]:
num_folds = 10
for name, m in models: 
    loocv = LeaveOneOut() 
    start_time=time.time()
    pred = cross_val_predict(m, X,Y, cv=loocv )
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y, pred, average='binary')
    AUC=metrics.roc_auc_score(Y, pred)
    error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(error*100.0))
    print(' =======================') 

LR
3.6998
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
1.7349
96.0707
92.0635
100.0000
96.0317
96.2547
3.9293
CART
1.3097
93.5167
91.6667
95.3307
93.4987
93.6902
6.4833
NB
1.2725
91.5521
92.8571
90.2724
91.5648
91.5187
8.4479
KNN
1.6295
92.1415
89.6825
94.5525
92.1175
92.3954
7.8585
SVM
3.1237
99.2141
99.2063
99.2218
99.2141
99.2218
0.7859
AB
40.1964
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
39.2970
97.4460
97.2222
97.6654
97.4438
97.4757
2.5540
RF
79.1777
97.0530
96.0317
98.0545
97.0431
97.1098
2.9470
ET
57.3069
95.6778
94.8413
96.4981
95.6697
95.7529
4.3222


# train test split

In [5]:
test_size = 0.3
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
for name, m in models:
    model = m
    start_time = time.time()
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    elapsed_time = time.time() - start_time
    confusion = metrics.confusion_matrix(Y_test,pred)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    specificity = TN / (TN + FP)
    sensitivity = TP / float(FN + TP)
    F1Score = f1_score(Y_test, pred, average='binary')
    AUC=metrics.roc_auc_score(Y_test, pred)
    class_error = (FP + FN) / float(TP + TN + FP + FN)
    print(name)
    print('%.4f' % elapsed_time)
    print('%.4f' % float(accuracy *100.0))
    print('%.4f' %float(specificity*100.0))
    print('%.4f' %float(sensitivity*100.0))
    print('%.4f' %float(AUC*100.0))
    print('%.4f' %float(F1Score*100.0))
    print('%.4f' %float(class_error*100.0))
    print(' ============================')

LR
0.0091
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
LDA
0.0042
97.3856
94.9367
100.0000
97.4684
97.3684
2.6144
CART
0.0028
94.1176
93.6709
94.5946
94.1327
93.9597
5.8824
NB
0.0037
94.1176
98.7342
89.1892
93.9617
93.6170
5.8824
KNN
0.0083
92.1569
94.9367
89.1892
92.0629
91.6667
7.8431
SVM
0.0044
96.7320
98.7342
94.5946
96.6644
96.5517
3.2680
AB
0.0764
100.0000
100.0000
100.0000
100.0000
100.0000
0.0000
GBM
0.0741
96.7320
98.7342
94.5946
96.6644
96.5517
3.2680
RF
0.1523
94.1176
96.2025
91.8919
94.0472
93.7931
5.8824
ET
0.1168
94.7712
96.2025
93.2432
94.7229
94.5205
5.2288
