# Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import xgboost
import sklearn

from sklearn.svm import SVC
from xgboost import plot_importance
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from numpy import std

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.utils import resample

import scipy as sc

# Import Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_excel('/content/drive/My Drive/Colab_Notebooks/covid_detect_clean.xlsx')
df_ext = pd.read_excel('/content/drive/My Drive/Colab_Notebooks/covid_detect_clean_oversampled.xlsx')
df.head()

Unnamed: 0,SESSO,AGE,WBC,Piastrine,Neutrofili,Linfociti,Monociti,Eosinofili,Basofili,PCR,AST,ALT,ALP,GGT,LDH,TARGET
0,M,58,4.2,245.0,3.3,0.6,0.3,0.0,0.0,167.6,25.0,23.0,,,190.0,1
1,M,65,10.1,229.0,,,,,,234.2,57.0,17.0,,,565.0,1
2,M,46,9.4,191.0,7.3,1.3,0.7,0.0,0.0,82.1,55.0,64.0,100.0,107.0,308.0,1
3,M,84,16.6,228.0,,,,,,143.5,38.0,39.0,84.0,26.0,210.0,1
4,M,40,5.2,186.0,4.0,0.7,0.5,0.0,0.0,98.7,50.0,56.0,59.0,81.0,324.0,1


# param_grid

In [None]:
pg_lr = [{'C' : [0.1, 1], 
        'max_iter' : [1000000]}]
        
lr = LogisticRegression(random_state=0, max_iter = 1000)

pg_svc = [{'C' : [1, 2, 3, 4, 10],
           'kernel' : ['rbf']}]

svc = SVC(random_state=0)


pg_dt = [{'max_depth' : [3, 4, 5, 6, 7]}]

dt = DecisionTreeClassifier(random_state=0)


pg_rf = [{'n_estimators' : [25, 30, 50, 70, 100, 150], 
            'max_depth' : [3, 4, 5, 6, 7]}]

rf = RandomForestClassifier(random_state=0)

pg_xgb = [{'n_estimators' : [25, 30, 50, 70, 100, 150], 
            'max_depth' : [3, 4, 5, 6, 7]}]

xgb = xgboost.XGBClassifier(random_state=0)

# pg_lgb = [{'n_estimators' : range(50, 200, 25), 
#            'max_depth' : [3, 4, 5, 6, 7],
#            'learning_rate' : [0.1, 0.13, 0.15, 0.20, 0.25],
#            'subsample' : [0.8, 0.9, 1],
#            'colsample_bytree' : [0.8, 0.9, 1]}]

pg_lgb = [{'n_estimators' : range(50, 200, 25), 
           'max_depth' : [3, 4, 5, 6, 7]}]           

import lightgbm
lgb = lightgbm.LGBMClassifier(random_state=0)            

In [None]:
list_imp = ['mean', 'median', 'knn', 'iter', 'without']
list_model = ['LogReg', 'SVC', 'DT', 'RF', 'XGB', 'LGB']
dic_model = {'LogReg':lr,'SVC':svc, 'DT':dt, 'RF':rf, 'XGB':xgb, 'LGB':lgb}
dic_pg    = {'LogReg':pg_lr,'SVC':pg_svc, 'DT':pg_dt, 'RF':pg_rf, 'XGB':pg_xgb, 'LGB':pg_lgb}
dic_imp = {'mean':SimpleImputer(missing_values=np.nan, strategy='mean'),
           'median':SimpleImputer(missing_values=np.nan, strategy='median'),
           'knn':  KNNImputer(n_neighbors=5, weights="uniform"),
           'iter':IterativeImputer(random_state=0, max_iter = 10000)}

# Imputation Input Output

In [None]:
imp_mean = IterativeImputer(random_state=0, max_iter=10000)
df_iter = imp_mean.fit_transform(df.iloc[:,1:-1].values)
df_ext_iter = imp_mean.fit_transform(df_ext.iloc[:,1:-1].values)
df_target = df.iloc[:,-1].values
df_ext_target = df_ext.iloc[:,-1].values

# XGB

In [None]:
def results(model):
    print("Best parameters:\n{}".format(model.best_params_))
    print("\nBest cross-validation score:\n{:.4f}".format(model.best_score_))
    print("\nBest estimator:\n{}\n".format(model.best_estimator_))

In [None]:
def one_param_graph(model, name):
    x = pd.DataFrame(model.cv_results_)[name]
    y = pd.DataFrame(model.cv_results_)['mean_test_score']
    plt.subplot(1, 2, 1)
    plt.plot(x, y, "b-", label='Accuracy')
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.title(name)

In [None]:
# grid search for XGB
param_grid_xgb = [{'n_estimators' : [25, 30, 50, 70, 100, 150], 
            'max_depth' : [3, 4, 5, 6, 7]}]

xgb_gs = GridSearchCV(xgboost.XGBClassifier(random_state=0), param_grid_xgb, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_iter, df_target, random_state=0)

xgb_gs.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(xgb_gs.score(X_test, y_test)))

results(xgb_gs)

confusion = confusion_matrix(y_test, xgb_gs.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))

# only need to change the second attribute to get the graph
# every hyperparam must start with "param_"
# one_param_graph(xgb_gs, 'param_learning_rate')


Test score:
0.7571428571428571

Best parameters:
{'max_depth': 4, 'n_estimators': 30}

Best cross-validation score:
0.7899

Best estimator:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=None, n_estimators=30, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

Confusion matrix:
[[14 13]
 [ 4 39]]


In [None]:
# grid search for oversampled XGB 
param_grid_xgb = [{'n_estimators' : [25, 30, 50, 70, 100, 150], 
            'max_depth' : [3, 4, 5, 6, 7]}]

xgb_gs_ext = GridSearchCV(xgboost.XGBClassifier(random_state=0), param_grid_xgb, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_ext_iter, df_ext_target)

xgb_gs_ext.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(xgb_gs_ext.score(X_test, y_test)))

results(xgb_gs_ext)

confusion = confusion_matrix(y_test, xgb_gs_ext.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))


Test score:
0.8651685393258427

Best parameters:
{'max_depth': 6, 'n_estimators': 70}

Best cross-validation score:
0.8679

Best estimator:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=None, n_estimators=70, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

Confusion matrix:
[[42  6]
 [ 6 35]]


# RF

In [None]:
# RF grid search
param_grid_rf = [{'n_estimators' : [25, 50, 100, 150, 200]}]

rf_gs = GridSearchCV(RandomForestClassifier(random_state=0), param_grid_rf, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_iter, df_target)

rf_gs.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(rf_gs.score(X_test, y_test)))

results(rf_gs)

confusion = confusion_matrix(y_test, rf_gs.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))

# one__param_graph(rf_gs, 'param_n_estimators')


Test score:
0.7714285714285715

Best parameters:
{'n_estimators': 50}

Best cross-validation score:
0.8135

Best estimator:
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

Confusion matrix:
[[11  9]
 [ 7 43]]


In [None]:
# RF grid search oversampled
param_grid_rf = [{'n_estimators' : [25, 50, 100, 150, 200], 
            'max_depth' : [5, 6, 7, 8, 9]}]

rf_gs_ext = GridSearchCV(RandomForestClassifier(random_state=0), param_grid_rf, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_ext_iter, df_ext_target)

rf_gs_ext.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(rf_gs_ext.score(X_test, y_test)))

results(rf_gs_ext)

confusion = confusion_matrix(y_test, rf_gs_ext.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))


Test score:
0.8651685393258427

Best parameters:
{'max_depth': 7, 'n_estimators': 50}

Best cross-validation score:
0.8943

Best estimator:
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=7, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

Confusion matrix:
[[40  6]
 [ 6 37]]


# DT

In [None]:
# DT grid search
param_grid_dt = [{ 'max_depth' : range(10)}]

dt_gs = GridSearchCV(DecisionTreeClassifier(random_state=0), param_grid_dt, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_iter, df_target)

dt_gs.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(dt_gs.score(X_test, y_test)))

results(dt_gs)

confusion = confusion_matrix(y_test, dt_gs.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))

# one_param_graph(dt_gs, 'param_max_depth')


Test score:
0.6571428571428571

Best parameters:
{'max_depth': 4}

Best cross-validation score:
0.7897

Best estimator:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=4, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

Confusion matrix:
[[10 16]
 [ 8 36]]


In [None]:
# DT grid search oversampled
param_grid_dt = [{ 'max_depth' : range(10)}]

dt_gs_ext = GridSearchCV(DecisionTreeClassifier(random_state=0), param_grid_dt, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_ext_iter, df_ext_target)

dt_gs_ext.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(dt_gs_ext.score(X_test, y_test)))

results(dt_gs_ext)

confusion = confusion_matrix(y_test, dt_gs_ext.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))


Test score:
0.8089887640449438

Best parameters:
{'max_depth': 9}

Best cross-validation score:
0.8340

Best estimator:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=9, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

Confusion matrix:
[[40  9]
 [ 8 32]]


# Light GBM

In [None]:
import lightgbm as lgb

param_grid_lgb = [{'n_estimators' : range(25, 200, 25), 
                   'max_depth' : [3, 4, 5, 6, 7]}]

lgb_gs = GridSearchCV(lgb.LGBMClassifier(random_state=0), param_grid_lgb, cv=5)

X_train, X_test, y_train, y_test = train_test_split(df_iter, df_target)

lgb_gs.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(lgb_gs.score(X_test, y_test)))

results(lgb_gs)

confusion = confusion_matrix(y_test, lgb_gs.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))


Test score:
0.6714285714285714

Best parameters:
{'max_depth': 3, 'n_estimators': 100}

Best cross-validation score:
0.8517

Best estimator:
LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=3,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=0, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

Confusion matrix:
[[14 14]
 [ 9 33]]


In [None]:
# grid search for oversampled lgb
param_grid_lgb = [{'n_estimators' : [25, 30, 50, 70, 100, 150], 
            'max_depth' : [3, 4, 5, 6, 7]}]

lgb_gs_ext = GridSearchCV(lgb.LGBMClassifier(random_state=0), param_grid_lgb, cv=5, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(df_ext_iter, df_ext_target)

lgb_gs_ext.fit(X_train, y_train)

print("\nTest score:\n{}\n".format(lgb_gs_ext.score(X_test, y_test)))

results(lgb_gs_ext)

confusion = confusion_matrix(y_test, lgb_gs_ext.predict(X_test))
print("Confusion matrix:\n{}".format(confusion))


Test score:
0.8876404494382022

Best parameters:
{'max_depth': 5, 'n_estimators': 100}

Best cross-validation score:
0.8679

Best estimator:
LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=5,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=0, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

Confusion matrix:
[[44  4]
 [ 6 35]]


# Cross Validation


In [None]:
scores_xgb = cross_val_score(xgb_gs.best_estimator_, df_iter, df_target)
scores_rf  = cross_val_score( rf_gs.best_estimator_, df_iter, df_target)
scores_dt  = cross_val_score( dt_gs.best_estimator_, df_iter, df_target)
scores_lgb  = cross_val_score( lgb_gs.best_estimator_, df_iter, df_target)
scores_xgb_ext = cross_val_score(xgb_gs_ext.best_estimator_, df_iter, df_target)
scores_rf_ext  = cross_val_score( rf_gs_ext.best_estimator_, df_iter, df_target)
scores_dt_ext  = cross_val_score( dt_gs_ext.best_estimator_, df_iter, df_target)
scores_lgb_ext  = cross_val_score( lgb_gs_ext.best_estimator_, df_iter, df_target)

print(f'XGB regular : {scores_xgb} Mean: {np.mean(scores_xgb)}')
print(f'XGB resample: {scores_xgb_ext} Mean: {np.mean(scores_xgb_ext)}')
print(f'RF  regular : {scores_rf} Mean: {np.mean(scores_rf)}')
print(f'RF  resample: {scores_rf_ext} Mean: {np.mean(scores_rf_ext)}')
print(f'DT  regular : {scores_dt} Mean: {np.mean(scores_dt)}')
print(f'DT  resample: {scores_dt_ext} Mean: {np.mean(scores_dt_ext)}')
print(f'LGB  regular : {scores_lgb} Mean: {np.mean(scores_lgb)}')
print(f'LGB  resample: {scores_lgb_ext} Mean: {np.mean(scores_lgb_ext)}')

XGB regular : [0.78571429 0.75       0.76785714 0.71428571 0.76363636] Mean: 0.7562987012987012
XGB resample: [0.76785714 0.75       0.75       0.71428571 0.81818182] Mean: 0.7600649350649351
RF  regular : [0.80357143 0.73214286 0.76785714 0.73214286 0.8       ] Mean: 0.7671428571428571
RF  resample: [0.82142857 0.82142857 0.78571429 0.73214286 0.89090909] Mean: 0.8103246753246752
DT  regular : [0.69642857 0.73214286 0.73214286 0.82142857 0.8       ] Mean: 0.7564285714285713
DT  resample: [0.71428571 0.67857143 0.71428571 0.69642857 0.70909091] Mean: 0.7025324675324676
LGB  regular : [0.80357143 0.76785714 0.75       0.69642857 0.81818182] Mean: 0.7672077922077923
LGB  resample: [0.82142857 0.73214286 0.75       0.71428571 0.81818182] Mean: 0.7672077922077922


# Bootstrap

In [None]:
df_result = pd.DataFrame(columns=list_model, index=list_imp)
ci_result = pd.DataFrame(columns=list_model, index=list_imp)
df_score  = pd.DataFrame()
df_recall = pd.DataFrame()
df_precision = pd.DataFrame()
df_f1 = pd.DataFrame()
df_specificity = pd.DataFrame()

In [None]:
df_1 = df[df['TARGET']==1]
df_1.iloc[:,1:]

Unnamed: 0,AGE,WBC,Piastrine,Neutrofili,Linfociti,Monociti,Eosinofili,Basofili,PCR,AST,ALT,ALP,GGT,LDH,TARGET
0,58,4.2,245.0,3.3,0.6,0.3,0.0,0.0,167.6,25.0,23.0,,,190.0,1
1,65,10.1,229.0,,,,,,234.2,57.0,17.0,,,565.0,1
2,46,9.4,191.0,7.3,1.3,0.7,0.0,0.0,82.1,55.0,64.0,100.0,107.0,308.0,1
3,84,16.6,228.0,,,,,,143.5,38.0,39.0,84.0,26.0,210.0,1
4,40,5.2,186.0,4.0,0.7,0.5,0.0,0.0,98.7,50.0,56.0,59.0,81.0,324.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,65,20.5,117.0,18.9,0.5,0.9,0.2,0.1,200.4,550.0,275.0,,,1061.0,1
173,68,7.5,253.0,5.9,1.0,0.5,0.0,0.0,56.8,36.0,29.0,87.0,48.0,474.0,1
174,62,3.1,200.0,2.2,0.7,0.3,0.0,0.0,104.9,62.0,47.0,82.0,93.0,384.0,1
175,49,10.3,620.0,8.0,1.7,0.4,0.2,0.0,92.8,114.0,196.0,419.0,839.0,605.0,1


In [None]:
X_0 = resample(df_0, replace=True, n_samples=177).iloc[:,1:]
X_0

Unnamed: 0,AGE,WBC,Piastrine,Neutrofili,Linfociti,Monociti,Eosinofili,Basofili,PCR,AST,ALT,ALP,GGT,LDH,TARGET
223,30,6.8,362.0,4.5,1.9,0.4,0.0,0.0,28.1,24.0,12.0,55.0,60.0,357.0,0
257,76,16.6,456.0,,,,,,94.4,33.0,14.0,72.0,40.0,652.0,0
190,29,11.2,296.0,5.8,4.1,0.8,0.4,0.1,2.8,19.0,23.0,69.0,24.0,182.0,0
191,63,7.4,243.0,,,,,,21.9,41.0,46.0,,,,0
227,94,12.1,112.0,,,,,,35.0,23.0,26.0,76.0,32.0,389.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,77,10.7,199.0,9.6,0.5,0.4,0.1,0.0,181.5,32.0,18.0,,,199.0,0
252,64,13.8,288.0,,,,,,132.4,247.0,47.0,838.0,804.0,,0
212,76,4.2,148.0,3.5,0.5,0.1,0.0,0.0,116.9,22.0,27.0,51.0,108.0,,0
194,87,10.4,180.0,,,,,,3.2,24.0,15.0,,,,0


In [None]:
# bootstrap, nested CV
# Sampling with the original sample's own data to obtain new samples and statistics

sss = StratifiedShuffleSplit(n_splits= 5, test_size=0.2, train_size=0.8, random_state=0)

df_0 = df[df['TARGET']==0]
df_1 = df[df['TARGET']==1]

for model in (list_model[4:-1]):
    for imputer in (list_imp[4:]):
        print(model,imputer)
        scores, precision, recall, f1, specificity = [], [], [], [], []
        for i in range(20): 
            print(i)
            # from 2nd row to the end
            X_0 = resample(df_0, replace=True, n_samples=150).iloc[:,1:]
            X_1 = resample(df_1, replace=True, n_samples=150).iloc[:,1:]
            _ = pd.concat([X_0, X_1])
            _.reset_index(drop=True, inplace=True)
            # from beginning to 2nd to last
            X = _.iloc[:,:-1].values
            print(X.shape)
            y = _.iloc[:,-1].values
            for train_index, test_index in sss.split(X,y):
                if imputer != 'without':
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    imp = dic_imp[imputer].fit(X_train)
                    X_train = imp.transform(X_train)
                    X_test  = imp.transform(X_test)
                    grid_search = GridSearchCV(dic_model[model], dic_pg[model], cv=5)
                    grid_search.fit(X_train, y_train)
                    scores.append(grid_search.score(X_test, y_test))
                    precision.append(precision_score(y_test, grid_search.predict(X_test)))
                    recall.append(recall_score(y_test, grid_search.predict(X_test)))
                    f1.append(f1_score(y_test, grid_search.predict(X_test)))
                    # confusion
                    confusion = confusion_matrix(y_test, grid_search.predict(X_test))
                    specificity.append(confusion[1][1]/(confusion[1][1]+confusion[0][1]))
                if imputer == 'without' and model == 'LGB' or model == 'XGB': 
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    grid_search = GridSearchCV(dic_model[model], dic_pg[model], cv=5)
                    grid_search.fit(X_train, y_train)
                    scores.append(grid_search.score(X_test, y_test))
                    precision.append(precision_score(y_test, grid_search.predict(X_test)))
                    recall.append(recall_score(y_test, grid_search.predict(X_test)))
                    f1.append(f1_score(y_test, grid_search.predict(X_test)))
                    # confusion
                    confusion = confusion_matrix(y_test, grid_search.predict(X_test))
                    specificity.append(confusion[1][1]/(confusion[1][1]+confusion[0][1]))
        try:
          df_result.loc[imputer,model] = sum(scores)/len(scores)
          df_score[imputer+' '+model] = scores
          df_precision[imputer+' '+model] = precision
          df_recall[imputer+' '+model] = recall
          df_f1[imputer+' '+model] = f1
          df_specificity[imputer+' '+model] = specificity
        except:
          pass

df_score.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_score.xlsx", index=False)
df_result.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_result.xlsx")
df_precision.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_precision.xlsx", index=False)
df_recall.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_recall.xlsx", index=False)
df_f1.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_f1.xlsx", index=False)
df_specificity.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_without_specificity.xlsx", index=False)

In [None]:
df_result

In [None]:
# df_score.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_score.xlsx", index=False)
# df_result.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_result.xlsx")
# df_precision.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_precision.xlsx", index=False)
# df_recall.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_recall.xlsx", index=False)
# df_f1.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_f1.xlsx", index=False)
# df_specificity.to_excel("/content/drive/MyDrive/Colab_Notebooks/lgb_mean_specificity.xlsx", index=False)