In [17]:
import csv
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN

In [18]:
score={'AUC':'roc_auc', 
           'RECALL':'recall',
           'PRECISION':'precision',
           'F1':'f1'}

In [19]:
X_train = pd.read_csv("data/X_all_train_wo_OS.csv", low_memory=False, index_col = 0).iloc[:, 2:]
y_train = pd.read_csv("data/y_train_wo_OS.csv", low_memory=False, index_col = 0).squeeze()
X_test = pd.read_csv("data/X_all_test.csv", low_memory=False, index_col = 0).iloc[:, 2:]
y_test = pd.read_csv("data/y_test.csv", low_memory=False, index_col = 0).squeeze()

In [20]:
LogReg = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', LogisticRegression(solver='lbfgs', random_state=0))
            ])
LogReg_para = {}
RandF = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', RandomForestClassifier(random_state=0))
            ])
RandF_para = {'classification__n_estimators':[20, 50, 100, 200, 400, 800], 'classification__max_depth':[2, 5, 10, 20]}
AdaBoost = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', AdaBoostClassifier(random_state=0))
            ])
AdaBoost_para = {'classification__n_estimators':[20, 50, 100, 200, 400, 800]}
SVM = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', SVC(decision_function_shape='ovr', degree=3, gamma='auto'))
            ]) 
SVM_para = {'classification__C':[0.01, 0.1, 1, 10], 'classification__kernel':('linear', 'rbf')}
NaivBay = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', GaussianNB())
            ])
NaivBay_para = {}
Knn = Pipeline([
            ('sampling', RandomOverSampler()),
            ('classification', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski'))
            ])
Knn_para = {'classification__n_neighbors': (10, 15, 25)}

In [21]:
clasifier_names = ["Logistic Regression", "Random Forest", "Adaptive Boosting", "Support Vector Machines", "Naive Bayes", "K Nearest Neighbours"]
classifiers = [LogReg, RandF, AdaBoost, SVM, NaivBay, Knn]
parameters = [LogReg_para, RandF_para, AdaBoost_para, SVM_para, NaivBay_para, Knn_para]

In [None]:
results = list()

for i in range(len(classifiers)):
    clf = GridSearchCV(classifiers[i], parameters[i], cv=5, scoring=score, n_jobs=-1, refit=False, return_train_score=True)
    clf.fit(X_train, y_train)
    results.append([clasifier_names[i], clf.cv_results_])
    print(clasifier_names[i])
    print(clf.cv_results_)

with open("results_ML.csv", 'w', newline='') as myfile:
     wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
     wr.writerow(results)

Logistic Regression
{'mean_fit_time': array([1.3142242]), 'std_fit_time': array([0.23446798]), 'mean_score_time': array([0.07124553]), 'std_score_time': array([0.02531092]), 'params': [{}], 'split0_test_AUC': array([0.5]), 'split1_test_AUC': array([0.5]), 'split2_test_AUC': array([0.51845976]), 'split3_test_AUC': array([0.49027402]), 'split4_test_AUC': array([0.5]), 'mean_test_AUC': array([0.501747]), 'std_test_AUC': array([0.00916629]), 'rank_test_AUC': array([1]), 'split0_train_AUC': array([0.5]), 'split1_train_AUC': array([0.5]), 'split2_train_AUC': array([0.50160914]), 'split3_train_AUC': array([0.50869988]), 'split4_train_AUC': array([0.5]), 'mean_train_AUC': array([0.50206181]), 'std_train_AUC': array([0.00337704]), 'split0_test_RECALL': array([0.]), 'split1_test_RECALL': array([0.]), 'split2_test_RECALL': array([0.0238394]), 'split3_test_RECALL': array([0.01631117]), 'split4_test_RECALL': array([0.]), 'mean_test_RECALL': array([0.00803011]), 'std_test_RECALL': array([0.01011893]

Unnamed: 0,spce,tl_ta,spceeps,spced,own_funds_ta_simple,cash_ta,currentliab_ta,work_cap,currentliab_cash_ta,own_funds_ta_adj,...,tlcf,aodo,drc,recta,rectr,optprcca,mrc1,gp,idit,lse
9272,0.000,0.232133,0.00,0.00,0.767867,0.156278,0.232133,0.471436,0.075855,1.171163e+00,...,131.300,321.225,301.001,11.067,581.500,41.79,10.364,1636.206,76.201,4623.249
71287,0.000,0.219340,0.00,0.00,0.780660,0.013807,0.179274,0.220659,0.165467,1.476982e+00,...,2.655,27.012,7.367,-111.754,21.831,12.28,4.775,423.968,7.222,893.101
18846,319.860,0.732976,2.74,2.70,0.267024,0.002338,0.213544,-0.055724,0.211206,8.766878e-01,...,0.000,0.000,0.000,0.100,674.000,35.66,0.000,956.700,0.000,11720.300
14258,0.000,0.582322,0.00,0.00,0.417678,0.000620,0.005666,0.037960,0.005046,8.312254e+00,...,0.000,0.017,0.056,0.000,0.098,0.00,0.000,0.211,0.053,116.123
57789,0.000,0.905902,0.00,0.00,0.094098,0.000255,0.066786,-0.046030,0.066531,3.131356e+00,...,228.000,150.000,0.000,0.000,247.000,35.38,0.000,3995.000,0.000,15707.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49793,-74.906,0.450922,-0.07,-0.07,0.549078,0.168526,0.113350,0.086122,-0.055175,7.351711e-01,...,328.470,49.781,0.000,-95.733,22.444,5.20,1.915,32.974,18.827,2660.420
67773,-112.749,0.443204,-2.32,-2.32,0.556796,0.000000,0.216217,0.163141,0.216217,1.116651e+00,...,487.275,59.120,0.000,0.000,83.110,5.14,0.490,-13.954,0.612,624.974
54473,-0.186,42.000000,-0.01,-0.01,-41.000000,0.000000,42.000000,-42.000000,42.000000,-3.805794e+18,...,0.000,0.000,0.000,0.000,0.000,0.00,0.000,0.000,0.000,0.008
65500,3.115,0.335716,0.20,0.19,0.664284,0.029918,0.316580,0.315841,0.286662,1.068497e+00,...,0.000,1.217,0.000,0.000,15.793,0.00,2.522,20.927,0.000,66.315


In [16]:
X_train

Unnamed: 0,spce,tl_ta,spceeps,spced,own_funds_ta_simple,cash_ta,currentliab_ta,work_cap,currentliab_cash_ta,own_funds_ta_adj,...,tlcf,aodo,drc,recta,rectr,optprcca,mrc1,gp,idit,lse
60619,-18.812,0.262915,-0.62,-0.62,0.737085,0.086909,0.088316,0.049707,0.001407,4.578155e+00,...,0.00,1.037,0.000,0.000,0.133,5.29,1.525,1.572,0.068,12.795
23786,0.000,0.828120,0.00,0.00,0.171880,0.091313,0.105852,0.029749,0.014539,7.801018e-01,...,0.00,352.352,0.000,-2.102,281.309,21.99,15.904,2439.283,12.304,22518.210
14514,0.000,0.517406,0.00,0.00,0.482594,0.043663,0.160567,0.234692,0.116904,1.363637e+00,...,4.90,7.531,0.000,-20.000,424.185,57.55,36.600,802.627,1.740,3266.515
4647,0.000,0.541500,0.00,0.00,0.458500,0.131009,0.345750,0.174575,0.214741,1.013231e+00,...,87.77,74.166,49.481,0.000,367.093,0.00,12.397,356.117,0.000,1504.679
60351,40.842,0.859328,17.78,17.78,0.140672,0.003121,0.077643,0.020219,0.074522,3.803903e-01,...,191.20,21.496,26.577,0.000,82.284,0.00,5.628,525.662,0.000,1365.772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6265,0.000,0.739705,0.00,0.00,0.260295,0.006320,0.132586,-0.048293,0.126266,-2.769323e+14,...,0.00,0.000,0.000,-33.000,1777.000,63.49,0.000,5704.000,78.000,69306.000
54886,0.000,0.709393,0.00,0.00,0.290607,0.000000,0.082888,-0.018443,0.082888,1.337164e+14,...,0.00,0.000,0.000,0.000,365.000,0.00,0.000,1280.000,27.000,14043.000
76820,11.856,0.123066,1.15,1.15,0.876934,0.227406,0.123066,0.668216,-0.104340,1.544773e+00,...,0.00,0.202,0.000,2.547,9.305,0.00,0.000,19.502,0.056,53.134
860,0.000,0.256547,0.00,0.00,0.743453,0.234051,0.235361,0.688789,0.001311,1.036744e+00,...,0.00,7.170,10.934,0.000,34.317,43.66,0.532,81.953,0.000,265.511
