In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

df= pd.read_csv('C://Users//User//Desktop//MSc Westminster//Dissertation//DataSets//Heart_Disease_Indicators.csv')

In [2]:
from sklearn.model_selection import train_test_split

target_size = 50000

num_class_0 = int(target_size * df['HeartDiseaseorAttack'].value_counts(normalize=True)[0])
num_class_1 = target_size - num_class_0

df_class_0 = df[df['HeartDiseaseorAttack'] == 0]
df_class_1 = df[df['HeartDiseaseorAttack'] == 1]

df_class_0_sampled = df_class_0.sample(n=num_class_0, random_state=15)
df_class_1_sampled = df_class_1.sample(n=num_class_1, random_state=15)

df_sampled = pd.concat([df_class_0_sampled, df_class_1_sampled])

df_sampled = df_sampled.sample(frac=1, random_state=15).reset_index(drop=True)
df= df_sampled
print(df['HeartDiseaseorAttack'].value_counts(normalize=True))

0    0.90582
1    0.09418
Name: HeartDiseaseorAttack, dtype: float64


In [3]:
df.drop_duplicates(inplace= True)
df.shape

(48050, 22)

In [4]:
df= df[df['Diabetes'] != 1].copy()
df.loc[df['Diabetes'] == 2, 'Diabetes'] = 1
print(df['Diabetes'].value_counts())

0    40175
1     6968
Name: Diabetes, dtype: int64


In [5]:
categorical_columns= ['HighBP', 'HighChol', 'CholCheck', 'Smoker', 'Stroke', 'Diabetes', 'PhysActivity',
                      'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'DiffWalk',
                      'Sex', 'Age', 'Education', 'Income'
                     ]
df[categorical_columns]= df[categorical_columns].astype(str)
df= pd.get_dummies(df, columns= categorical_columns, drop_first= True)

In [6]:
X= df.drop(['HeartDiseaseorAttack'], axis= 1)
y= df['HeartDiseaseorAttack']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.2, random_state= 15, stratify= y)

continuous_columns = ['BMI', 'GenHlth', 'MentHlth', 'PhysHlth']

from sklearn.preprocessing import StandardScaler
ss= StandardScaler()
X_train= ss.fit_transform(X_train)
X_test= ss.transform(X_test)

In [8]:
from imblearn.combine import SMOTETomek

smt= SMOTETomek(random_state= 15)
X_sm_tl, y_sm_tl= smt.fit_resample(X_train, y_train)

In [9]:
from sklearn.feature_selection import mutual_info_classif

mi_scores= mutual_info_classif(X_sm_tl, y_sm_tl)

In [10]:
feature_names = X.columns
mi_df= pd.DataFrame({'Feature': feature_names,
                     'Mutual Information': mi_scores})

mi_df= mi_df.sort_values(by='Mutual Information', ascending=False)
mi_df.head(10)

Unnamed: 0,Feature,Mutual Information
0,BMI,0.451605
1,GenHlth,0.29859
3,PhysHlth,0.263515
2,MentHlth,0.184449
4,HighBP_1,0.165869
5,HighChol_1,0.160179
7,Smoker_1,0.123589
17,Sex_1,0.116595
11,Fruits_1,0.097016
16,DiffWalk_1,0.087758


In [11]:
k= 10
top_k_features= mi_df.head(k)['Feature'].values
top_k_indices= [feature_names.get_loc(f) for f in top_k_features]

X_sm_tl_selected= X_sm_tl[:, top_k_indices]
X_sm_tl_test_selected= X_test[:, top_k_indices]

--- LogisticRegression ---

In [12]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, roc_auc_score, accuracy_score
from sklearn.linear_model import LogisticRegression
lr= LogisticRegression(random_state= 15)
lr.fit(X_sm_tl_selected, y_sm_tl)
y_pred_lr= lr.predict(X_sm_tl_test_selected)
y_pred_prob_lr= lr.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_lr, labels= lr.classes_)
print(classification_report(y_test, y_pred_lr, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lr, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lr))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lr))

              precision    recall  f1-score   support

           0       0.97      0.73      0.83      8517
           1       0.23      0.78      0.36       912

    accuracy                           0.73      9429
   macro avg       0.60      0.75      0.60      9429
weighted avg       0.90      0.73      0.79      9429

Precision: 0.23467020218760357
Accuracy: 0.7334818114328137
AUC: 0.8183436403766255


--- DecisionTreeClassifier ---

In [13]:
from sklearn.tree import DecisionTreeClassifier
dt= DecisionTreeClassifier(random_state=15)
dt.fit(X_sm_tl_selected, y_sm_tl)
y_pred_dt= dt.predict(X_sm_tl_test_selected)
y_pred_prob_dt= dt.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_dt, labels= dt.classes_)
print(classification_report(y_test, y_pred_dt, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_dt, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_dt))
print('AUC:', roc_auc_score(y_test, y_pred_prob_dt))

              precision    recall  f1-score   support

           0       0.92      0.93      0.93      8517
           1       0.26      0.22      0.24       912

    accuracy                           0.87      9429
   macro avg       0.59      0.58      0.58      9429
weighted avg       0.85      0.87      0.86      9429

Precision: 0.261968085106383
Accuracy: 0.8653091526142751
AUC: 0.5929852755788731


--- Tuned - DecisionTreeClassifier ---

In [14]:
from sklearn.model_selection import GridSearchCV
param_grid= {
             'max_depth': [None, 10, 20, 30, 40, 50],
             'min_samples_split': [2, 10, 20],
             'min_samples_leaf': [1, 5, 10],
             'max_features': [None, 'sqrt', 'log2'],
             'criterion': ['gini', 'entropy']
            }

gs_dt= GridSearchCV(estimator= dt, param_grid= param_grid, cv= 5, scoring= 'precision')
gs_dt.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_dt.best_params_)
print("Best Precision Score:", gs_dt.best_score_)

Best Parameters: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 5, 'min_samples_split': 20}
Best Precision Score: 0.9498570401610322


In [15]:
tuned_dt= gs_dt.best_estimator_
y_pred_tuned_dt= tuned_dt.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_dt= tuned_dt.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_dt, labels= tuned_dt.classes_)
print(classification_report(y_test, y_pred_tuned_dt, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_dt, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_dt))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_dt))

              precision    recall  f1-score   support

           0       0.92      0.96      0.94      8517
           1       0.32      0.18      0.23       912

    accuracy                           0.88      9429
   macro avg       0.62      0.57      0.58      9429
weighted avg       0.86      0.88      0.87      9429

Precision: 0.31721470019342357
Accuracy: 0.8832325803372574
AUC: 0.7108278283474331


--- KNeighborsClassifier ---

In [16]:
from sklearn.neighbors import KNeighborsClassifier
knn= KNeighborsClassifier()
knn.fit(X_sm_tl_selected, y_sm_tl)
y_pred_knn= knn.predict(X_sm_tl_test_selected)
y_pred_prob_knn= knn.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_knn, labels= knn.classes_)
print(classification_report(y_test, y_pred_knn, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_knn, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_knn))
print('AUC:', roc_auc_score(y_test, y_pred_prob_knn))

              precision    recall  f1-score   support

           0       0.93      0.88      0.91      8517
           1       0.26      0.39      0.31       912

    accuracy                           0.83      9429
   macro avg       0.60      0.63      0.61      9429
weighted avg       0.87      0.83      0.85      9429

Precision: 0.26112759643916916
Accuracy: 0.8349771980061512
AUC: 0.7152744948699092


--- Tuned - KNeighborsClassifier ---

In [17]:
param_grid= {
             'n_neighbors': np.arange(1,40),
             'weights': ['uniform', 'distance'],
             'metric': ['euclidean', 'manhattan', 'minkowski']
            }

gs_knn= GridSearchCV(estimator= knn, param_grid= param_grid, cv=5, scoring= 'precision')
gs_knn.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_knn.best_params_)
print("Best Precision Score:", gs_knn.best_score_)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 2, 'weights': 'uniform'}
Best Precision Score: 0.945683018507563


In [18]:
tuned_knn= gs_knn.best_estimator_
y_pred_tuned_knn= tuned_knn.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_knn= tuned_knn.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_knn, labels= tuned_knn.classes_)
print(classification_report(y_test, y_pred_tuned_knn, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_knn, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_knn))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_knn))

              precision    recall  f1-score   support

           0       0.91      0.96      0.94      8517
           1       0.29      0.15      0.20       912

    accuracy                           0.88      9429
   macro avg       0.60      0.56      0.57      9429
weighted avg       0.85      0.88      0.86      9429

Precision: 0.28661087866108786
Accuracy: 0.881641743557111
AUC: 0.634918952085509


--- GaussianNB ---

In [19]:
from sklearn.naive_bayes import GaussianNB
nb= GaussianNB()
nb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_nb= nb.predict(X_sm_tl_test_selected)
y_pred_prob_nb= nb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_nb, labels= nb.classes_)
print(classification_report(y_test, y_pred_nb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_nb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_nb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_nb))

              precision    recall  f1-score   support

           0       0.96      0.77      0.85      8517
           1       0.24      0.69      0.36       912

    accuracy                           0.76      9429
   macro avg       0.60      0.73      0.61      9429
weighted avg       0.89      0.76      0.81      9429

Precision: 0.24272974020938348
Accuracy: 0.7625410966168205
AUC: 0.8035158398373532


--- SVM ---

In [20]:
from sklearn.svm import SVC
svc= SVC(kernel= 'rbf',probability= True, gamma= 1, random_state=15)
svc.fit(X_sm_tl_selected, y_sm_tl)
y_pred_svc= svc.predict(X_sm_tl_test_selected)
y_pred_prob_svc= svc.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_svc, labels= svc.classes_)
print(classification_report(y_test, y_pred_svc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_svc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_svc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_svc))

              precision    recall  f1-score   support

           0       0.94      0.89      0.91      8517
           1       0.30      0.45      0.36       912

    accuracy                           0.85      9429
   macro avg       0.62      0.67      0.64      9429
weighted avg       0.88      0.85      0.86      9429

Precision: 0.2996336996336996
Accuracy: 0.845264609184431
AUC: 0.7381298419672522


--- Random Forest ---

In [21]:
from sklearn.ensemble import RandomForestClassifier
rf= RandomForestClassifier(random_state=15)
rf.fit(X_sm_tl_selected, y_sm_tl)
y_pred_rf= rf.predict(X_sm_tl_test_selected)
y_pred_prob_rf= rf.predict_proba(X_sm_tl_test_selected)[:,1]

In [22]:
cm= confusion_matrix(y_test, y_pred_rf, labels= rf.classes_)
print(classification_report(y_test, y_pred_rf, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_rf, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_rf))
print('AUC:', roc_auc_score(y_test, y_pred_prob_rf))

              precision    recall  f1-score   support

           0       0.92      0.96      0.94      8517
           1       0.32      0.18      0.23       912

    accuracy                           0.88      9429
   macro avg       0.62      0.57      0.58      9429
weighted avg       0.86      0.88      0.87      9429

Precision: 0.3241106719367589
Accuracy: 0.8843991939760314
AUC: 0.7536391999283168


In [23]:
param_grid= {
             'n_estimators': [100, 200, 300],
             'max_depth': [None, 10, 20, 30],
             'min_samples_split': [2, 5, 10],
             'min_samples_leaf': [1, 2, 4],
            }

gs_tuned_rf= GridSearchCV(estimator= rf, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_tuned_rf.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_tuned_rf.best_params_)
print("Best Precision Score:", gs_tuned_rf.best_score_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': 30, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 200}
Best Precision Score: 0.9683657892224483


In [24]:
tuned_rf= gs_tuned_rf.best_estimator_
y_pred_tuned_rf= tuned_rf.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_rf= tuned_rf.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_rf, labels= tuned_rf.classes_)
print(classification_report(y_test, y_pred_tuned_rf, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_rf, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_rf))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_rf))

              precision    recall  f1-score   support

           0       0.92      0.98      0.94      8517
           1       0.41      0.16      0.23       912

    accuracy                           0.90      9429
   macro avg       0.66      0.57      0.59      9429
weighted avg       0.87      0.90      0.88      9429

Precision: 0.40782122905027934
Accuracy: 0.8962774419344576
AUC: 0.7930528262360728


--- AdaBoost ---

In [25]:
from sklearn.ensemble import AdaBoostClassifier
ada= AdaBoostClassifier(random_state=15)
ada.fit(X_sm_tl_selected, y_sm_tl)
y_pred_ada= ada.predict(X_sm_tl_test_selected)
y_pred_prob_ada= ada.predict_proba(X_sm_tl_test_selected)[:,1]

In [26]:
cm= confusion_matrix(y_test, y_pred_ada, labels= ada.classes_)
print(classification_report(y_test, y_pred_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_ada))

              precision    recall  f1-score   support

           0       0.93      0.91      0.92      8517
           1       0.32      0.39      0.35       912

    accuracy                           0.86      9429
   macro avg       0.63      0.65      0.64      9429
weighted avg       0.87      0.86      0.87      9429

Precision: 0.3235831809872029
Accuracy: 0.8623395906246686
AUC: 0.8141567098002138


In [27]:
param_grid= {'n_estimators': [50, 100, 200]}

gs_ada= GridSearchCV(estimator= ada, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_ada.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_ada.best_params_)
print("Best Precision Score:", gs_ada.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'n_estimators': 200}
Best Precision Score: 0.9589106583206636


In [28]:
tuned_ada= gs_ada.best_estimator_
y_pred_tuned_ada= tuned_ada.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_ada= tuned_ada.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_ada, labels= tuned_ada.classes_)
print(classification_report(y_test, y_pred_tuned_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_ada))

              precision    recall  f1-score   support

           0       0.92      0.96      0.94      8517
           1       0.42      0.25      0.31       912

    accuracy                           0.89      9429
   macro avg       0.67      0.60      0.63      9429
weighted avg       0.87      0.89      0.88      9429

Precision: 0.41589648798521256
Accuracy: 0.8936260473008802
AUC: 0.8145411318745379


--- GradientBoosting ---

In [29]:
from sklearn.ensemble import GradientBoostingClassifier
grb= GradientBoostingClassifier(random_state=15)
grb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_grb= grb.predict(X_sm_tl_test_selected)
y_pred_prob_grb= grb.predict_proba(X_sm_tl_test_selected)[:,1]


cm= confusion_matrix(y_test, y_pred_grb, labels= grb.classes_)
print(classification_report(y_test, y_pred_grb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_grb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_grb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_grb))

              precision    recall  f1-score   support

           0       0.92      0.95      0.94      8517
           1       0.35      0.26      0.30       912

    accuracy                           0.88      9429
   macro avg       0.64      0.61      0.62      9429
weighted avg       0.87      0.88      0.87      9429

Precision: 0.35346097201767307
Accuracy: 0.8821720224838265
AUC: 0.8176701292976483


In [30]:
param_grid= {'learning_rate': [0.01, 0.1, 0.2]}

gs_grb= GridSearchCV(estimator= grb, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_grb.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_grb.best_params_)
print("Best Precision Score:", gs_grb.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.2}
Best Precision Score: 0.9767738148223227


In [31]:
tuned_grb= gs_grb.best_estimator_
y_pred_tuned_grb= tuned_grb.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_grb= tuned_grb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_grb, labels= tuned_grb.classes_)
print(classification_report(y_test, y_pred_tuned_grb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_grb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_grb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_grb))

              precision    recall  f1-score   support

           0       0.91      0.98      0.95      8517
           1       0.45      0.15      0.22       912

    accuracy                           0.90      9429
   macro avg       0.68      0.56      0.58      9429
weighted avg       0.87      0.90      0.88      9429

Precision: 0.4542372881355932
Accuracy: 0.900413617562838
AUC: 0.8177361093087303


--- XGB ---

In [32]:
from xgboost import XGBClassifier
xgb= XGBClassifier(random_state=15)
xgb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_xgb= xgb.predict(X_sm_tl_test_selected)
y_pred_prob_xgb= xgb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_xgb, labels= xgb.classes_)
print(classification_report(y_test, y_pred_xgb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_xgb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_xgb))

              precision    recall  f1-score   support

           0       0.91      0.99      0.95      8517
           1       0.46      0.10      0.16       912

    accuracy                           0.90      9429
   macro avg       0.69      0.54      0.56      9429
weighted avg       0.87      0.90      0.87      9429

Precision: 0.4642857142857143
Accuracy: 0.9017923427722982
AUC: 0.8028120101386493


In [33]:
params_XGBoost= {'learning_rate': [0.01, 0.1, 1.0]}

gs_xgb= GridSearchCV(estimator= xgb, param_grid= params_XGBoost, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_xgb.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_xgb.best_params_)
print("Best Precision Score:", gs_xgb.best_score_)

tuned_xgb= gs_xgb.best_estimator_
y_pred_tuned_xgb= tuned_xgb.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_xgb= tuned_xgb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_xgb, labels= tuned_xgb.classes_)
print(classification_report(y_test, y_pred_tuned_xgb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_xgb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_xgb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_xgb))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.1}
Best Precision Score: 0.9747480444462729
              precision    recall  f1-score   support

           0       0.92      0.98      0.95      8517
           1       0.43      0.16      0.23       912

    accuracy                           0.90      9429
   macro avg       0.67      0.57      0.59      9429
weighted avg       0.87      0.90      0.88      9429

Precision: 0.43154761904761907
Accuracy: 0.8983985576413194
AUC: 0.8158691002927067


--- LGBM ---

In [34]:
from lightgbm import LGBMClassifier
lgm= LGBMClassifier(random_state=15)
lgm.fit(X_sm_tl_selected, y_sm_tl)
y_pred_lgm= lgm.predict(X_sm_tl_test_selected)
y_pred_prob_lgm= lgm.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_lgm, labels= lgm.classes_)
print(classification_report(y_test, y_pred_lgm, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lgm, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lgm))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lgm))

[LightGBM] [Info] Number of positive: 33993, number of negative: 33993
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 67986, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      8517
           1       0.52      0.11      0.18       912

    accuracy                           0.90      9429
   macro avg       0.72      0.55      0.57      9429
weighted avg       0.87      0.90      0.88      9429

Precision: 0.5177664974619289
Accuracy: 0.9040195142645031
AUC: 0.814555422179377


In [35]:
params_LGB= {'learning_rate': [0.001, 0.01, 0.1, 1.0],
             'num_leaves': [31, 127],
             'reg_alpha': [0.1, 0.5],
             'min_data_in_leaf': [30, 50, 100, 300, 400]}

gs_lgm= GridSearchCV(estimator= lgm, param_grid= params_LGB, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_lgm.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_lgm.best_params_)
print("Best Precision Score:", gs_lgm.best_score_)

tuned_lgm= gs_lgm.best_estimator_
y_pred_tuned_lgm= tuned_lgm.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_lgm= tuned_lgm.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_lgm, labels= tuned_lgm.classes_)
print(classification_report(y_test, y_pred_tuned_lgm, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_lgm, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_lgm))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_lgm))

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[LightGBM] [Info] Number of positive: 33993, number of negative: 33993
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004861 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 67986, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Best Parameters: {'learning_rate': 0.1, 'min_data_in_leaf': 100, 'num_leaves': 31, 'reg_alpha': 0.5}
Best Precision Score: 0.9838356862925568
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      8517
           1       0.53      0.10      0.17       912

    accuracy                           0.90      9429
   macro avg       0.72      0.55      0.56      9429
weighted avg       0.88      0.90      0.87      9429

Precision: 0.5340909090909091


--- CatBoost ---

In [36]:
from catboost import CatBoostClassifier
cat= CatBoostClassifier(random_state=15)
cat.fit(X_sm_tl_selected, y_sm_tl)
y_pred_cat= cat.predict(X_sm_tl_test_selected)
y_pred_prob_cat= cat.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_cat, labels= cat.classes_)
print(classification_report(y_test, y_pred_cat, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_cat, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_cat))
print('AUC:', roc_auc_score(y_test, y_pred_prob_cat))

Learning rate set to 0.062428
0:	learn: 0.6227600	total: 160ms	remaining: 2m 39s
1:	learn: 0.5730601	total: 179ms	remaining: 1m 29s
2:	learn: 0.5255174	total: 200ms	remaining: 1m 6s
3:	learn: 0.4880984	total: 218ms	remaining: 54.3s
4:	learn: 0.4546782	total: 234ms	remaining: 46.6s
5:	learn: 0.4282168	total: 248ms	remaining: 41.2s
6:	learn: 0.4047185	total: 263ms	remaining: 37.3s
7:	learn: 0.3854845	total: 278ms	remaining: 34.5s
8:	learn: 0.3696500	total: 295ms	remaining: 32.5s
9:	learn: 0.3533006	total: 311ms	remaining: 30.8s
10:	learn: 0.3372007	total: 326ms	remaining: 29.3s
11:	learn: 0.3260619	total: 341ms	remaining: 28.1s
12:	learn: 0.3146735	total: 358ms	remaining: 27.2s
13:	learn: 0.3058804	total: 375ms	remaining: 26.4s
14:	learn: 0.2972685	total: 396ms	remaining: 26s
15:	learn: 0.2909119	total: 415ms	remaining: 25.5s
16:	learn: 0.2842765	total: 434ms	remaining: 25.1s
17:	learn: 0.2774650	total: 451ms	remaining: 24.6s
18:	learn: 0.2724301	total: 469ms	remaining: 24.2s
19:	learn: 

171:	learn: 0.1649486	total: 3.04s	remaining: 14.6s
172:	learn: 0.1648877	total: 3.05s	remaining: 14.6s
173:	learn: 0.1647055	total: 3.06s	remaining: 14.5s
174:	learn: 0.1646199	total: 3.08s	remaining: 14.5s
175:	learn: 0.1645492	total: 3.09s	remaining: 14.5s
176:	learn: 0.1644627	total: 3.1s	remaining: 14.4s
177:	learn: 0.1643737	total: 3.11s	remaining: 14.4s
178:	learn: 0.1643124	total: 3.12s	remaining: 14.3s
179:	learn: 0.1642330	total: 3.13s	remaining: 14.3s
180:	learn: 0.1640646	total: 3.15s	remaining: 14.2s
181:	learn: 0.1640162	total: 3.16s	remaining: 14.2s
182:	learn: 0.1639566	total: 3.17s	remaining: 14.1s
183:	learn: 0.1639056	total: 3.18s	remaining: 14.1s
184:	learn: 0.1637117	total: 3.19s	remaining: 14.1s
185:	learn: 0.1636426	total: 3.2s	remaining: 14s
186:	learn: 0.1630712	total: 3.21s	remaining: 14s
187:	learn: 0.1628412	total: 3.23s	remaining: 13.9s
188:	learn: 0.1624779	total: 3.24s	remaining: 13.9s
189:	learn: 0.1624213	total: 3.26s	remaining: 13.9s
190:	learn: 0.1623

332:	learn: 0.1489716	total: 5.02s	remaining: 10.1s
333:	learn: 0.1489394	total: 5.03s	remaining: 10s
334:	learn: 0.1488890	total: 5.04s	remaining: 10s
335:	learn: 0.1488622	total: 5.05s	remaining: 9.99s
336:	learn: 0.1488167	total: 5.07s	remaining: 9.97s
337:	learn: 0.1487834	total: 5.08s	remaining: 9.95s
338:	learn: 0.1487447	total: 5.09s	remaining: 9.92s
339:	learn: 0.1487047	total: 5.1s	remaining: 9.9s
340:	learn: 0.1486772	total: 5.11s	remaining: 9.88s
341:	learn: 0.1486418	total: 5.12s	remaining: 9.86s
342:	learn: 0.1485324	total: 5.13s	remaining: 9.84s
343:	learn: 0.1484280	total: 5.15s	remaining: 9.82s
344:	learn: 0.1483740	total: 5.16s	remaining: 9.79s
345:	learn: 0.1483416	total: 5.17s	remaining: 9.77s
346:	learn: 0.1483029	total: 5.18s	remaining: 9.75s
347:	learn: 0.1482160	total: 5.19s	remaining: 9.73s
348:	learn: 0.1481813	total: 5.21s	remaining: 9.71s
349:	learn: 0.1481515	total: 5.22s	remaining: 9.7s
350:	learn: 0.1481175	total: 5.23s	remaining: 9.67s
351:	learn: 0.14803

495:	learn: 0.1429758	total: 6.99s	remaining: 7.1s
496:	learn: 0.1429528	total: 7s	remaining: 7.09s
497:	learn: 0.1429223	total: 7.02s	remaining: 7.07s
498:	learn: 0.1428576	total: 7.03s	remaining: 7.06s
499:	learn: 0.1428243	total: 7.04s	remaining: 7.04s
500:	learn: 0.1428048	total: 7.05s	remaining: 7.02s
501:	learn: 0.1427818	total: 7.06s	remaining: 7.01s
502:	learn: 0.1427492	total: 7.07s	remaining: 6.99s
503:	learn: 0.1427265	total: 7.08s	remaining: 6.97s
504:	learn: 0.1427128	total: 7.09s	remaining: 6.96s
505:	learn: 0.1426912	total: 7.11s	remaining: 6.94s
506:	learn: 0.1426767	total: 7.12s	remaining: 6.92s
507:	learn: 0.1426565	total: 7.13s	remaining: 6.9s
508:	learn: 0.1426340	total: 7.14s	remaining: 6.89s
509:	learn: 0.1426053	total: 7.15s	remaining: 6.87s
510:	learn: 0.1425365	total: 7.16s	remaining: 6.85s
511:	learn: 0.1424940	total: 7.18s	remaining: 6.84s
512:	learn: 0.1424537	total: 7.19s	remaining: 6.83s
513:	learn: 0.1424369	total: 7.2s	remaining: 6.81s
514:	learn: 0.1424

661:	learn: 0.1386647	total: 8.99s	remaining: 4.59s
662:	learn: 0.1386450	total: 9.01s	remaining: 4.58s
663:	learn: 0.1386302	total: 9.02s	remaining: 4.56s
664:	learn: 0.1386106	total: 9.03s	remaining: 4.55s
665:	learn: 0.1385747	total: 9.04s	remaining: 4.54s
666:	learn: 0.1385553	total: 9.05s	remaining: 4.52s
667:	learn: 0.1385225	total: 9.06s	remaining: 4.5s
668:	learn: 0.1384980	total: 9.08s	remaining: 4.49s
669:	learn: 0.1384742	total: 9.09s	remaining: 4.48s
670:	learn: 0.1384567	total: 9.1s	remaining: 4.46s
671:	learn: 0.1384416	total: 9.11s	remaining: 4.45s
672:	learn: 0.1384111	total: 9.13s	remaining: 4.43s
673:	learn: 0.1383951	total: 9.14s	remaining: 4.42s
674:	learn: 0.1383726	total: 9.15s	remaining: 4.41s
675:	learn: 0.1383565	total: 9.17s	remaining: 4.39s
676:	learn: 0.1383309	total: 9.18s	remaining: 4.38s
677:	learn: 0.1382833	total: 9.2s	remaining: 4.37s
678:	learn: 0.1382627	total: 9.21s	remaining: 4.36s
679:	learn: 0.1382312	total: 9.22s	remaining: 4.34s
680:	learn: 0.1

823:	learn: 0.1350108	total: 11s	remaining: 2.35s
824:	learn: 0.1349898	total: 11s	remaining: 2.33s
825:	learn: 0.1349645	total: 11s	remaining: 2.32s
826:	learn: 0.1349450	total: 11s	remaining: 2.3s
827:	learn: 0.1349282	total: 11s	remaining: 2.29s
828:	learn: 0.1348944	total: 11s	remaining: 2.28s
829:	learn: 0.1348648	total: 11.1s	remaining: 2.27s
830:	learn: 0.1348489	total: 11.1s	remaining: 2.25s
831:	learn: 0.1348348	total: 11.1s	remaining: 2.24s
832:	learn: 0.1347935	total: 11.1s	remaining: 2.23s
833:	learn: 0.1347641	total: 11.1s	remaining: 2.21s
834:	learn: 0.1347443	total: 11.1s	remaining: 2.2s
835:	learn: 0.1347234	total: 11.1s	remaining: 2.19s
836:	learn: 0.1346898	total: 11.2s	remaining: 2.17s
837:	learn: 0.1346681	total: 11.2s	remaining: 2.16s
838:	learn: 0.1346422	total: 11.2s	remaining: 2.15s
839:	learn: 0.1346313	total: 11.2s	remaining: 2.13s
840:	learn: 0.1346159	total: 11.2s	remaining: 2.12s
841:	learn: 0.1345905	total: 11.2s	remaining: 2.11s
842:	learn: 0.1345568	tota

987:	learn: 0.1319848	total: 13s	remaining: 158ms
988:	learn: 0.1319699	total: 13s	remaining: 145ms
989:	learn: 0.1319552	total: 13s	remaining: 131ms
990:	learn: 0.1319366	total: 13s	remaining: 118ms
991:	learn: 0.1319268	total: 13s	remaining: 105ms
992:	learn: 0.1319165	total: 13s	remaining: 92ms
993:	learn: 0.1319032	total: 13.1s	remaining: 78.8ms
994:	learn: 0.1318816	total: 13.1s	remaining: 65.7ms
995:	learn: 0.1318630	total: 13.1s	remaining: 52.5ms
996:	learn: 0.1318555	total: 13.1s	remaining: 39.4ms
997:	learn: 0.1318434	total: 13.1s	remaining: 26.3ms
998:	learn: 0.1318222	total: 13.1s	remaining: 13.1ms
999:	learn: 0.1318012	total: 13.1s	remaining: 0us
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      8517
           1       0.48      0.08      0.14       912

    accuracy                           0.90      9429
   macro avg       0.69      0.54      0.54      9429
weighted avg       0.87      0.90      0.87      9429

Precis

In [37]:
params_CatBoost= {
                  'depth': [3,5,10],
                  'learning_rate' : [0.01,0.1,1],
                  'iterations' : [5,10,50,100]
                 }

gs_cat= GridSearchCV(estimator= cat, param_grid= params_CatBoost, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_cat.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_cat.best_params_)
print("Best Precision Score:", gs_cat.best_score_)

tuned_cat= gs_cat.best_estimator_
y_pred_tuned_cat= tuned_cat.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_cat= tuned_cat.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_cat, labels= tuned_cat.classes_)
print(classification_report(y_test, y_pred_tuned_cat, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_cat, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_cat))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_cat))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
0:	learn: 0.5032783	total: 9.18ms	remaining: 909ms
1:	learn: 0.4105469	total: 19.3ms	remaining: 948ms
2:	learn: 0.3597681	total: 29.4ms	remaining: 950ms
3:	learn: 0.3198424	total: 39.1ms	remaining: 939ms
4:	learn: 0.2912644	total: 45.7ms	remaining: 869ms
5:	learn: 0.2748218	total: 51.4ms	remaining: 805ms
6:	learn: 0.2583488	total: 56.7ms	remaining: 753ms
7:	learn: 0.2467307	total: 61.7ms	remaining: 710ms
8:	learn: 0.2343625	total: 66.7ms	remaining: 674ms
9:	learn: 0.2310910	total: 71.2ms	remaining: 641ms
10:	learn: 0.2284264	total: 76.5ms	remaining: 619ms
11:	learn: 0.2243744	total: 81.4ms	remaining: 597ms
12:	learn: 0.2200056	total: 86.5ms	remaining: 579ms
13:	learn: 0.2154307	total: 91.7ms	remaining: 563ms
14:	learn: 0.2129153	total: 96.4ms	remaining: 546ms
15:	learn: 0.2095322	total: 102ms	remaining: 534ms
16:	learn: 0.2049567	total: 107ms	remaining: 522ms
17:	learn: 0.2017211	total: 112ms	remaining: 509ms
18:	learn: 0.19

--- Visualisation ---

In [38]:
precision_scores= {
                    'Logistic Regression Precision:': precision_score(y_test, y_pred_lr, zero_division= 0),
                    'Decision Tree Precision:': precision_score(y_test, y_pred_dt, zero_division= 0),
                    'Tuned Decision Tree Precision:': precision_score(y_test, y_pred_tuned_dt, zero_division= 0),
                    'KNeighborsClassifier Precision:': precision_score(y_test, y_pred_knn, zero_division= 0),
                    'Tuned KNeighborsClassifier Precision:': precision_score(y_test, y_pred_tuned_knn, zero_division= 0),
                    'GaussianNB Precision:': precision_score(y_test, y_pred_nb, zero_division= 0),
                    'SVM Precision:': precision_score(y_test, y_pred_svc, zero_division= 0),
                    'Random Forest Precision:': precision_score(y_test, y_pred_rf, zero_division= 0),
                    'Tuned Random Forest Precision:': precision_score(y_test, y_pred_tuned_rf, zero_division= 0),
                    'AdaBoost Precision:': precision_score(y_test, y_pred_ada, zero_division= 0),
                    'Tuned AdaBoost Precision:': precision_score(y_test, y_pred_tuned_ada, zero_division= 0),
                    'GradientBoosting Precision:': precision_score(y_test, y_pred_grb, zero_division= 0),
                    'Tuned GradientBoosting Precision:': precision_score(y_test, y_pred_tuned_grb, zero_division= 0),
                    'XGB Precision:': precision_score(y_test, y_pred_xgb, zero_division= 0),
                    'Tuned XGB Precision:': precision_score(y_test, y_pred_tuned_xgb, zero_division= 0),
                    'LGBM Precision:': precision_score(y_test, y_pred_lgm, zero_division= 0),
                    'Tuned LGBM Precision:': precision_score(y_test, y_pred_tuned_lgm, zero_division= 0),
                    'CatBoost Precision:': precision_score(y_test, y_pred_cat, zero_division= 0),
                    'Tuned CatBoost Precision:': precision_score(y_test, y_pred_tuned_cat, zero_division= 0)
                  }

annova_precision= pd.DataFrame(list(precision_scores.items()), columns= ['Model', 'Precision Score'])
annova_precision= annova_precision.sort_values(by= 'Precision Score', ascending=False)
print(annova_precision)

                                    Model  Precision Score
16                  Tuned LGBM Precision:         0.534091
18              Tuned CatBoost Precision:         0.519380
15                        LGBM Precision:         0.517766
17                    CatBoost Precision:         0.477707
13                         XGB Precision:         0.464286
12      Tuned GradientBoosting Precision:         0.454237
14                   Tuned XGB Precision:         0.431548
10              Tuned AdaBoost Precision:         0.415896
8          Tuned Random Forest Precision:         0.407821
11            GradientBoosting Precision:         0.353461
7                Random Forest Precision:         0.324111
9                     AdaBoost Precision:         0.323583
2          Tuned Decision Tree Precision:         0.317215
6                          SVM Precision:         0.299634
4   Tuned KNeighborsClassifier Precision:         0.286611
1                Decision Tree Precision:         0.2619

In [39]:
accuracy_scores= {
                    'Logistic Regression Accuracy:': accuracy_score(y_test, y_pred_lr),
                    'Decision Tree Accuracy:': accuracy_score(y_test, y_pred_dt),
                    'Tuned Decision Tree Accuracy:': accuracy_score(y_test, y_pred_tuned_dt),
                    'KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_knn),
                    'Tuned KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_tuned_knn),
                    'GaussianNB Accuracy:': accuracy_score(y_test, y_pred_nb),
                    'SVM Accuracy:': accuracy_score(y_test, y_pred_svc),
                    'Random Forest Accuracy:': accuracy_score(y_test, y_pred_rf),
                    'Tuned Random Forest Accuracy:': accuracy_score(y_test, y_pred_tuned_rf),
                    'AdaBoost Accuracy:': accuracy_score(y_test, y_pred_ada),
                    'Tuned AdaBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_ada),
                    'GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_grb),
                    'Tuned GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_tuned_grb),
                    'XGB Accuracy:': accuracy_score(y_test, y_pred_xgb),
                    'Tuned XGB Accuracy:': accuracy_score(y_test, y_pred_tuned_xgb),
                    'LGBM Accuracy:': accuracy_score(y_test, y_pred_lgm),
                    'Tuned LGBM Accuracy:': accuracy_score(y_test, y_pred_tuned_lgm),
                    'CatBoost Accuracy:': accuracy_score(y_test, y_pred_cat),
                    'Tuned CatBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_cat)
                  }

annova_accuracy= pd.DataFrame(list(accuracy_scores.items()), columns= ['Model', 'Accuracy Score'])
annova_accuracy= annova_accuracy.sort_values(by= 'Accuracy Score', ascending=False)
print(annova_accuracy)

                                   Model  Accuracy Score
16                  Tuned LGBM Accuracy:        0.904550
15                        LGBM Accuracy:        0.904020
18              Tuned CatBoost Accuracy:        0.903807
17                    CatBoost Accuracy:        0.902535
13                         XGB Accuracy:        0.901792
12      Tuned GradientBoosting Accuracy:        0.900414
14                   Tuned XGB Accuracy:        0.898399
8          Tuned Random Forest Accuracy:        0.896277
10              Tuned AdaBoost Accuracy:        0.893626
7                Random Forest Accuracy:        0.884399
2          Tuned Decision Tree Accuracy:        0.883233
11            GradientBoosting Accuracy:        0.882172
4   Tuned KNeighborsClassifier Accuracy:        0.881642
1                Decision Tree Accuracy:        0.865309
9                     AdaBoost Accuracy:        0.862340
6                          SVM Accuracy:        0.845265
3         KNeighborsClassifier 

In [40]:
auc_scores= {
                    'Logistic Regression AUC:': roc_auc_score(y_test, y_pred_prob_lr),
                    'Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_dt),
                    'Tuned Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_tuned_dt),
                    'KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_knn),
                    'Tuned KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_tuned_knn),
                    'GaussianNB AUC:': roc_auc_score(y_test, y_pred_prob_nb),
                    'SVM AUC:': roc_auc_score(y_test, y_pred_prob_svc),
                    'Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_rf),
                    'Tuned Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_tuned_rf),
                    'AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_ada),
                    'Tuned AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_ada),
                    'GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_grb),
                    'Tuned GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_tuned_grb),
                    'XGB AUC:': roc_auc_score(y_test, y_pred_prob_xgb),
                    'Tuned XGB AUC:': roc_auc_score(y_test, y_pred_prob_tuned_xgb),
                    'LGBM AUC:': roc_auc_score(y_test, y_pred_prob_lgm),
                    'Tuned LGBM AUC:': roc_auc_score(y_test, y_pred_prob_tuned_lgm),
                    'CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_cat),
                    'Tuned CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_cat)
                  }

annova_auc= pd.DataFrame(list(auc_scores.items()), columns= ['Model', 'AUC Score'])
annova_auc= annova_auc.sort_values(by= 'AUC Score', ascending=False)
print(annova_auc)

                              Model  AUC Score
0          Logistic Regression AUC:   0.818344
12      Tuned GradientBoosting AUC:   0.817736
11            GradientBoosting AUC:   0.817670
14                   Tuned XGB AUC:   0.815869
16                  Tuned LGBM AUC:   0.815580
18              Tuned CatBoost AUC:   0.814946
15                        LGBM AUC:   0.814555
10              Tuned AdaBoost AUC:   0.814541
9                     AdaBoost AUC:   0.814157
17                    CatBoost AUC:   0.808594
5                   GaussianNB AUC:   0.803516
13                         XGB AUC:   0.802812
8          Tuned Random Forest AUC:   0.793053
7                Random Forest AUC:   0.753639
6                          SVM AUC:   0.738130
3         KNeighborsClassifier AUC:   0.715274
2          Tuned Decision Tree AUC:   0.710828
4   Tuned KNeighborsClassifier AUC:   0.634919
1                Decision Tree AUC:   0.592985
