In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df= pd.read_csv('C://Users//User//Desktop//MSc Westminster//Dissertation//DataSets//Heart_Attack_Prediction.csv')

In [2]:
columns=df.columns

for column in columns:
    if df[column].dtype=="int32":
        df[column]=df[column].astype("int16")
    elif df[column].dtype=="float64":
        df[column]=df[column].astype("float16")
    elif df[column].dtype=="object":
        df[column]=df[column].astype("category")

In [3]:
df['Sex']= df['Sex'].map({'Female': 0, 'Male': 1})
df['Sex']= pd.to_numeric(df['Sex'])

df['Diet']= df['Diet'].map({'Healthy': 0, 'Average': 1, 'Unhealthy':2})
df['Diet']= pd.to_numeric(df['Diet'])

df[['HBP', 'LBP']]= df['Blood Pressure'].str.split('/', expand= True)
df['HBP']= pd.to_numeric(df['HBP'])
df['LBP']= pd.to_numeric(df['LBP'])

df['Diabetes'] = df['Diabetes'].map({0: 1, 1: 0})

df['Exercise Hours Per Week']= round(df['Exercise Hours Per Week'], 0)

df['Sedentary Hours Per Day']= round(df['Sedentary Hours Per Day'], 0)

df['Income']= round(df['Income'], 0)

df['BMI']= round(df['BMI'], 0)

df = df.drop(columns=['Patient ID', 'Blood Pressure', 'Country', 'Continent', 'Hemisphere'])

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 23 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              8763 non-null   int64  
 1   Sex                              8763 non-null   int64  
 2   Cholesterol                      8763 non-null   int64  
 3   Heart Rate                       8763 non-null   int64  
 4   Diabetes                         8763 non-null   int64  
 5   Family History                   8763 non-null   int64  
 6   Smoking                          8763 non-null   int64  
 7   Obesity                          8763 non-null   int64  
 8   Alcohol Consumption              8763 non-null   int64  
 9   Exercise Hours Per Week          8763 non-null   float16
 10  Diet                             8763 non-null   int64  
 11  Previous Heart Problems          8763 non-null   int64  
 12  Medication Use      

In [5]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,8763.0,53.70798,21.249509,18.0,35.0,54.0,72.0,90.0
Sex,8763.0,0.6973639,0.459425,0.0,0.0,1.0,1.0,1.0
Cholesterol,8763.0,259.8772,80.863276,120.0,192.0,259.0,330.0,400.0
Heart Rate,8763.0,75.02168,20.550948,40.0,57.0,75.0,93.0,110.0
Diabetes,8763.0,0.347712,0.476271,0.0,0.0,0.0,1.0,1.0
Family History,8763.0,0.4929819,0.499979,0.0,0.0,0.0,1.0,1.0
Smoking,8763.0,0.896839,0.304186,0.0,1.0,1.0,1.0,1.0
Obesity,8763.0,0.5014265,0.500026,0.0,0.0,1.0,1.0,1.0
Alcohol Consumption,8763.0,0.5980828,0.490313,0.0,0.0,1.0,1.0,1.0
Exercise Hours Per Week,8763.0,inf,5.796875,0.0,5.0,10.0,15.0,20.0


In [6]:
X= df.drop(['Heart Attack Risk'], axis= 1)
y= df['Heart Attack Risk']

In [7]:
from collections import Counter
from imblearn.under_sampling import ClusterCentroids
from sklearn.cluster import KMeans

cc= ClusterCentroids(random_state= 15, estimator= KMeans(n_init= 10))

X_cc, y_cc= cc.fit_resample(X, y)
print(f'Cluster Centroids: {Counter(y_cc)}')

Cluster Centroids: Counter({0: 3139, 1: 3139})


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state= 15, stratify= y)

from sklearn.preprocessing import RobustScaler
scaler= RobustScaler()
scaler.fit(X_train)

X_train= scaler.transform(X_train)
X_test= scaler.transform(X_test)

X_cc_train, y_cc_train= cc.fit_resample(X_train, y_train)

--- LogisticRegression ---

In [9]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, roc_auc_score, accuracy_score
from sklearn.linear_model import LogisticRegression
lr_cc= LogisticRegression(random_state=15)
lr_cc.fit(X_cc_train, y_cc_train)
y_pred_lr_cc= lr_cc.predict(X_test)
y_pred_prob_lr_cc= lr_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_lr_cc, labels= lr_cc.classes_)
print(classification_report(y_test, y_pred_lr_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lr_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lr_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lr_cc))

              precision    recall  f1-score   support

           0       0.64      0.46      0.54      1125
           1       0.36      0.54      0.43       628

    accuracy                           0.49      1753
   macro avg       0.50      0.50      0.48      1753
weighted avg       0.54      0.49      0.50      1753

Precision: 0.3580508474576271
Accuracy: 0.48887621220764405
AUC: 0.4952116065109696


--- DecisionTreeClassifier ---

In [10]:
from sklearn.tree import DecisionTreeClassifier
dt_cc= DecisionTreeClassifier(random_state=15)
dt_cc.fit(X_cc_train, y_cc_train)
y_pred_dt_cc= dt_cc.predict(X_test)
y_pred_prob_dt_cc= dt_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_dt_cc, labels= dt_cc.classes_)
print(classification_report(y_test, y_pred_dt_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_dt_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_dt_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_dt_cc))

              precision    recall  f1-score   support

           0       0.64      0.41      0.50      1125
           1       0.36      0.59      0.45       628

    accuracy                           0.48      1753
   macro avg       0.50      0.50      0.48      1753
weighted avg       0.54      0.48      0.48      1753

Precision: 0.36019417475728155
Accuracy: 0.47746719908727897
AUC: 0.5024932767162067


--- Tuned - DecisionTreeClassifier ---

In [11]:
from sklearn.model_selection import GridSearchCV
param_grid= {
             'max_depth': [None, 10, 20, 30, 40, 50],
             'min_samples_split': [2, 10, 20],
             'min_samples_leaf': [1, 5, 10],
             'max_features': [None, 'sqrt', 'log2'],
             'criterion': ['gini', 'entropy']
            }

gs_dt_cc= GridSearchCV(estimator= dt_cc, param_grid= param_grid, cv= 5, scoring= 'precision')
gs_dt_cc.fit(X_cc_train, y_cc_train)

print("Best Parameters:", gs_dt_cc.best_params_)
print("Best Precision Score:", gs_dt_cc.best_score_)

Best Parameters: {'criterion': 'entropy', 'max_depth': 10, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 20}
Best Precision Score: 0.5928779891370938


In [12]:
tuned_dt_cc= gs_dt_cc.best_estimator_
y_pred_tuned_dt_cc= tuned_dt_cc.predict(X_test)
y_pred_prob_tuned_dt_cc= tuned_dt_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_dt_cc, labels= tuned_dt_cc.classes_)
print(classification_report(y_test, y_pred_tuned_dt_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_dt_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_dt_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_dt_cc))

              precision    recall  f1-score   support

           0       0.66      0.34      0.45      1125
           1       0.37      0.69      0.48       628

    accuracy                           0.46      1753
   macro avg       0.51      0.51      0.46      1753
weighted avg       0.55      0.46      0.46      1753

Precision: 0.36618521665250636
Accuracy: 0.4620650313747861
AUC: 0.49704953998584567


--- KNeighborsClassifier ---

In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn_cc= KNeighborsClassifier()
knn_cc.fit(X_cc_train, y_cc_train)
y_pred_knn_cc= knn_cc.predict(X_test)
y_pred_prob_knn_cc= knn_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_knn_cc, labels= knn_cc.classes_)
print(classification_report(y_test, y_pred_knn_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_knn_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_knn_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_knn_cc))

              precision    recall  f1-score   support

           0       0.65      0.61      0.63      1125
           1       0.37      0.41      0.39       628

    accuracy                           0.54      1753
   macro avg       0.51      0.51      0.51      1753
weighted avg       0.55      0.54      0.55      1753

Precision: 0.3722943722943723
Accuracy: 0.5407872219053051
AUC: 0.5077480537862703


--- Tuned - KNeighborsClassifier ---

In [14]:
param_grid= {
             'n_neighbors': np.arange(1,40),
             'weights': ['uniform', 'distance'],
             'metric': ['euclidean', 'manhattan', 'minkowski']
            }

gs_knn_cc= GridSearchCV(estimator= knn_cc, param_grid= param_grid, cv=5, scoring= 'precision')
gs_knn_cc.fit(X_cc_train, y_cc_train)
print("Best Parameters:", gs_knn_cc.best_params_)
print("Best Precision Score:", gs_knn_cc.best_score_)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 38, 'weights': 'uniform'}
Best Precision Score: 0.5333682460376644


In [15]:
tuned_knn_cc= gs_knn_cc.best_estimator_
y_pred_tuned_knn_cc= tuned_knn_cc.predict(X_test)
y_pred_prob_tuned_knn_cc= tuned_knn_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_knn_cc, labels= tuned_knn_cc.classes_)
print(classification_report(y_test, y_pred_tuned_knn_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_knn_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_knn_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_knn_cc))

              precision    recall  f1-score   support

           0       0.65      0.49      0.56      1125
           1       0.36      0.53      0.43       628

    accuracy                           0.50      1753
   macro avg       0.51      0.51      0.49      1753
weighted avg       0.55      0.50      0.51      1753

Precision: 0.36443468715697036
Accuracy: 0.5008556759840274
AUC: 0.4990693559801839


--- GaussianNB ---

In [16]:
from sklearn.naive_bayes import GaussianNB
nb_cc= GaussianNB()
nb_cc.fit(X_cc_train, y_cc_train)
y_pred_nb_cc= nb_cc.predict(X_test)
y_pred_prob_nb_cc= nb_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_nb_cc, labels= nb_cc.classes_)
print(classification_report(y_test, y_pred_nb_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_nb_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_nb_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_nb_cc))

              precision    recall  f1-score   support

           0       0.66      0.33      0.44      1125
           1       0.37      0.70      0.48       628

    accuracy                           0.46      1753
   macro avg       0.51      0.51      0.46      1753
weighted avg       0.55      0.46      0.45      1753

Precision: 0.36569037656903763
Accuracy: 0.45864232743867656
AUC: 0.5013021939136588


--- SVM ---

In [17]:
from sklearn.svm import SVC
svc_cc= SVC(kernel= 'rbf',probability= True, gamma= 1, random_state=15)
svc_cc.fit(X_cc_train, y_cc_train)
y_pred_svc_cc= svc_cc.predict(X_test)
y_pred_prob_svc_cc= svc_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_svc_cc, labels= svc_cc.classes_)
print(classification_report(y_test, y_pred_svc_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_svc_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_svc_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_svc_cc))

              precision    recall  f1-score   support

           0       0.66      0.41      0.50      1125
           1       0.37      0.62      0.46       628

    accuracy                           0.48      1753
   macro avg       0.51      0.51      0.48      1753
weighted avg       0.55      0.48      0.49      1753

Precision: 0.36707663197729423
Accuracy: 0.4814603536794067
AUC: 0.5179575371549894


--- Random Forest ---

In [18]:
from sklearn.ensemble import RandomForestClassifier
rf_cc= RandomForestClassifier(random_state=15)
rf_cc.fit(X_cc_train, y_cc_train)
y_pred_rf_cc= rf_cc.predict(X_test)
y_pred_prob_rf_cc= rf_cc.predict_proba(X_test)[:,1]

In [19]:
cm= confusion_matrix(y_test, y_pred_rf_cc, labels= rf_cc.classes_)
print(classification_report(y_test, y_pred_rf_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_rf_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_rf_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_rf_cc))

              precision    recall  f1-score   support

           0       0.67      0.27      0.39      1125
           1       0.37      0.76      0.49       628

    accuracy                           0.44      1753
   macro avg       0.52      0.51      0.44      1753
weighted avg       0.56      0.44      0.42      1753

Precision: 0.3667953667953668
Accuracy: 0.44495151169423847
AUC: 0.5224762915782024


In [20]:
param_grid= {
             'n_estimators': [100, 200, 300],
             'max_depth': [None, 10, 20, 30],
             'min_samples_split': [2, 5, 10],
             'min_samples_leaf': [1, 2, 4],
            }

gs_tuned_rf_cc= GridSearchCV(estimator= rf_cc, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_tuned_rf_cc.fit(X_cc_train, y_cc_train)
print("Best Parameters:", gs_tuned_rf_cc.best_params_)
print("Best Precision Score:", gs_tuned_rf_cc.best_score_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Precision Score: 0.6450986170414204


In [21]:
tuned_rf_cc= gs_tuned_rf_cc.best_estimator_
y_pred_tuned_rf_cc= tuned_rf_cc.predict(X_test)
y_pred_prob_tuned_rf_cc= tuned_rf_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_rf_cc, labels= tuned_rf_cc.classes_)
print(classification_report(y_test, y_pred_tuned_rf_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_rf_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_rf_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_rf_cc))

              precision    recall  f1-score   support

           0       0.65      0.26      0.38      1125
           1       0.36      0.75      0.49       628

    accuracy                           0.44      1753
   macro avg       0.51      0.51      0.43      1753
weighted avg       0.55      0.44      0.42      1753

Precision: 0.3618827160493827
Accuracy: 0.4375356531660011
AUC: 0.5232922859164898


--- AdaBoost ---

In [22]:
from sklearn.ensemble import AdaBoostClassifier
ada_cc= AdaBoostClassifier(random_state=15)
ada_cc.fit(X_cc_train, y_cc_train)
y_pred_ada_cc= ada_cc.predict(X_test)
y_pred_prob_ada_cc= ada_cc.predict_proba(X_test)[:,1]

In [23]:
cm= confusion_matrix(y_test, y_pred_ada_cc, labels= ada_cc.classes_)
print(classification_report(y_test, y_pred_ada_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_ada_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_ada_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_ada_cc))

              precision    recall  f1-score   support

           0       0.65      0.23      0.34      1125
           1       0.36      0.78      0.49       628

    accuracy                           0.43      1753
   macro avg       0.51      0.51      0.42      1753
weighted avg       0.55      0.43      0.40      1753

Precision: 0.36148148148148146
Accuracy: 0.42840844266970907
AUC: 0.5230368011323425


In [24]:
param_grid= {'n_estimators': [50, 100, 200]}

gs_ada_cc= GridSearchCV(estimator= ada_cc, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_ada_cc.fit(X_cc_train, y_cc_train)
print("Best Parameters:", gs_ada_cc.best_params_)
print("Best Precision Score:", gs_ada_cc.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'n_estimators': 200}
Best Precision Score: 0.6492390840671949


In [25]:
tuned_ada_cc= gs_ada_cc.best_estimator_
y_pred_tuned_ada_cc= tuned_ada_cc.predict(X_test)
y_pred_prob_tuned_ada_cc= tuned_ada_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_ada_cc, labels= tuned_ada_cc.classes_)
print(classification_report(y_test, y_pred_tuned_ada_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_ada_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_ada_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_ada_cc))

              precision    recall  f1-score   support

           0       0.65      0.22      0.32      1125
           1       0.36      0.79      0.50       628

    accuracy                           0.42      1753
   macro avg       0.50      0.50      0.41      1753
weighted avg       0.55      0.42      0.38      1753

Precision: 0.3601449275362319
Accuracy: 0.42156303479749
AUC: 0.49051663128096246


--- GradientBoosting ---

In [26]:
from sklearn.ensemble import GradientBoostingClassifier
grb_cc= GradientBoostingClassifier(random_state=15)
grb_cc.fit(X_cc_train, y_cc_train)
y_pred_grb_cc= grb_cc.predict(X_test)
y_pred_prob_grb_cc= grb_cc.predict_proba(X_test)[:,1]


cm= confusion_matrix(y_test, y_pred_grb_cc, labels= grb_cc.classes_)
print(classification_report(y_test, y_pred_grb_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_grb_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_grb_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_grb_cc))

              precision    recall  f1-score   support

           0       0.65      0.15      0.25      1125
           1       0.36      0.86      0.51       628

    accuracy                           0.40      1753
   macro avg       0.51      0.50      0.38      1753
weighted avg       0.55      0.40      0.34      1753

Precision: 0.3599195710455764
Accuracy: 0.4033086138049059
AUC: 0.5054847841472044


In [27]:
param_grid= {'learning_rate': [0.01, 0.1, 0.2]}

gs_grb_cc= GridSearchCV(estimator= grb_cc, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_grb_cc.fit(X_cc_train, y_cc_train)

print("Best Parameters:", gs_grb_cc.best_params_)
print("Best Precision Score:", gs_grb_cc.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.2}
Best Precision Score: 0.6727544967658836


In [28]:
tuned_grb_cc= gs_grb_cc.best_estimator_
y_pred_tuned_grb_cc= tuned_grb_cc.predict(X_test)
y_pred_prob_tuned_grb_cc= tuned_grb_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_grb_cc, labels= tuned_grb_cc.classes_)
print(classification_report(y_test, y_pred_tuned_grb_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_grb_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_grb_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_grb_cc))

              precision    recall  f1-score   support

           0       0.64      0.18      0.28      1125
           1       0.36      0.82      0.50       628

    accuracy                           0.41      1753
   macro avg       0.50      0.50      0.39      1753
weighted avg       0.54      0.41      0.36      1753

Precision: 0.35828135828135826
Accuracy: 0.40844266970907017
AUC: 0.5042632696390659


--- XGB ---

In [29]:
from xgboost import XGBClassifier
xgb_cc= XGBClassifier(random_state=15)
xgb_cc.fit(X_cc_train, y_cc_train)
y_pred_xgb_cc= xgb_cc.predict(X_test)
y_pred_prob_xgb_cc= xgb_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_xgb_cc, labels= xgb_cc.classes_)
print(classification_report(y_test, y_pred_xgb_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_xgb_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_xgb_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_xgb_cc))

              precision    recall  f1-score   support

           0       0.67      0.26      0.38      1125
           1       0.37      0.76      0.50       628

    accuracy                           0.44      1753
   macro avg       0.52      0.51      0.44      1753
weighted avg       0.56      0.44      0.42      1753

Precision: 0.36613272311212813
Accuracy: 0.4415288077581289
AUC: 0.5054338287331919


In [30]:
params_XGBoost= {'learning_rate': [0.01, 0.1, 1.0]}

gs_xgb_cc= GridSearchCV(estimator= xgb_cc, param_grid= params_XGBoost, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_xgb_cc.fit(X_cc_train, y_cc_train)

print("Best Parameters:", gs_xgb_cc.best_params_)
print("Best Precision Score:", gs_xgb_cc.best_score_)

tuned_xgb_cc= gs_xgb_cc.best_estimator_
y_pred_tuned_xgb_cc= tuned_xgb_cc.predict(X_test)
y_pred_prob_tuned_xgb_cc= tuned_xgb_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_xgb_cc, labels= tuned_xgb_cc.classes_)
print(classification_report(y_test, y_pred_tuned_xgb_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_xgb_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_xgb_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_xgb_cc))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.1}
Best Precision Score: 0.6731939337181202
              precision    recall  f1-score   support

           0       0.69      0.20      0.31      1125
           1       0.37      0.84      0.51       628

    accuracy                           0.43      1753
   macro avg       0.53      0.52      0.41      1753
weighted avg       0.57      0.43      0.38      1753

Precision: 0.3686054660126139
Accuracy: 0.4278379920136908
AUC: 0.5051592356687897


--- LGBM ---

In [31]:
from lightgbm import LGBMClassifier
lgm_cc= LGBMClassifier(random_state=15)
lgm_cc.fit(X_cc_train, y_cc_train)
y_pred_lgm_cc= lgm_cc.predict(X_test)
y_pred_prob_lgm_cc= lgm_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_lgm_cc, labels= lgm_cc.classes_)
print(classification_report(y_test, y_pred_lgm_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lgm_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lgm_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lgm_cc))

[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066351 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2300
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
              precision    recall  f1-score   support

           0       0.66      0.20      0.31      1125
           1       0.36      0.81      0.50       628

    accuracy                           0.42      1753
   macro avg       0.51      0.51      0.41      1753
weighted avg       0.55      0.42      0.38      1753

Precision: 0.36253561253561256
Accuracy: 0.42156303479749
AUC: 0.5129171974522293


In [32]:
params_LGB= {'learning_rate': [0.001, 0.01, 0.1, 1.0],
             'num_leaves': [31, 127],
             'reg_alpha': [0.1, 0.5],
             'min_data_in_leaf': [30, 50, 100, 300, 400]}

gs_lgm_cc= GridSearchCV(estimator= lgm_cc, param_grid= params_LGB, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_lgm_cc.fit(X_cc_train, y_cc_train)

print("Best Parameters:", gs_lgm_cc.best_params_)
print("Best Precision Score:", gs_lgm_cc.best_score_)

tuned_lgm_cc= gs_lgm_cc.best_estimator_
y_pred_tuned_lgm_cc= tuned_lgm_cc.predict(X_test)
y_pred_prob_tuned_lgm_cc= tuned_lgm_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_lgm_cc, labels= tuned_lgm_cc.classes_)
print(classification_report(y_test, y_pred_tuned_lgm_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_lgm_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_lgm_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_lgm_cc))

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007377 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2300
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Best Parameters: {'learning_rate': 0.1, 'min_data_in_leaf': 30, 'num_leaves': 31, 'reg_alpha': 0.5}
Best Precision Score: 0.6676656204711786
              precision    recall  f1-score   support

           0       0.67      0.23      0.34      1125
           1       0.37      0.80      0.50       628

    accuracy                           0.43      1753
   macro avg       0.52      0.51      0.42      1753
weighted avg       0.56  

--- CatBoost ---

In [33]:
from catboost import CatBoostClassifier
cat_cc= CatBoostClassifier(random_state=15)
cat_cc.fit(X_cc_train, y_cc_train)
y_pred_cat_cc= cat_cc.predict(X_test)
y_pred_prob_cat_cc= cat_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_cat_cc, labels= cat_cc.classes_)
print(classification_report(y_test, y_pred_cat_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_cat_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_cat_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_cat_cc))

Learning rate set to 0.020521
0:	learn: 0.6877416	total: 146ms	remaining: 2m 26s
1:	learn: 0.6817443	total: 156ms	remaining: 1m 17s
2:	learn: 0.6776529	total: 165ms	remaining: 54.9s
3:	learn: 0.6737810	total: 174ms	remaining: 43.3s
4:	learn: 0.6707208	total: 184ms	remaining: 36.6s
5:	learn: 0.6668524	total: 196ms	remaining: 32.4s
6:	learn: 0.6639793	total: 206ms	remaining: 29.2s
7:	learn: 0.6591470	total: 216ms	remaining: 26.7s
8:	learn: 0.6562428	total: 226ms	remaining: 24.9s
9:	learn: 0.6533721	total: 236ms	remaining: 23.4s
10:	learn: 0.6511158	total: 248ms	remaining: 22.3s
11:	learn: 0.6489045	total: 261ms	remaining: 21.5s
12:	learn: 0.6470215	total: 270ms	remaining: 20.5s
13:	learn: 0.6430032	total: 280ms	remaining: 19.7s
14:	learn: 0.6392293	total: 288ms	remaining: 18.9s
15:	learn: 0.6370184	total: 299ms	remaining: 18.4s
16:	learn: 0.6355198	total: 310ms	remaining: 17.9s
17:	learn: 0.6323647	total: 318ms	remaining: 17.4s
18:	learn: 0.6308601	total: 328ms	remaining: 17s
19:	learn: 

174:	learn: 0.5238986	total: 1.87s	remaining: 8.82s
175:	learn: 0.5235790	total: 1.88s	remaining: 8.81s
176:	learn: 0.5233500	total: 1.89s	remaining: 8.8s
177:	learn: 0.5231468	total: 1.9s	remaining: 8.79s
178:	learn: 0.5229352	total: 1.91s	remaining: 8.78s
179:	learn: 0.5226785	total: 1.93s	remaining: 8.79s
180:	learn: 0.5223719	total: 1.94s	remaining: 8.77s
181:	learn: 0.5220804	total: 1.95s	remaining: 8.76s
182:	learn: 0.5217006	total: 1.96s	remaining: 8.74s
183:	learn: 0.5211731	total: 1.97s	remaining: 8.72s
184:	learn: 0.5208262	total: 1.98s	remaining: 8.71s
185:	learn: 0.5205284	total: 1.99s	remaining: 8.7s
186:	learn: 0.5199615	total: 2s	remaining: 8.68s
187:	learn: 0.5197141	total: 2.01s	remaining: 8.66s
188:	learn: 0.5194343	total: 2.01s	remaining: 8.64s
189:	learn: 0.5191782	total: 2.02s	remaining: 8.63s
190:	learn: 0.5188533	total: 2.03s	remaining: 8.61s
191:	learn: 0.5187009	total: 2.04s	remaining: 8.59s
192:	learn: 0.5183809	total: 2.05s	remaining: 8.58s
193:	learn: 0.5181

335:	learn: 0.4776450	total: 3.54s	remaining: 7s
336:	learn: 0.4774733	total: 3.55s	remaining: 6.99s
337:	learn: 0.4772325	total: 3.56s	remaining: 6.97s
338:	learn: 0.4769740	total: 3.57s	remaining: 6.96s
339:	learn: 0.4768130	total: 3.58s	remaining: 6.94s
340:	learn: 0.4764718	total: 3.59s	remaining: 6.93s
341:	learn: 0.4762987	total: 3.59s	remaining: 6.92s
342:	learn: 0.4760561	total: 3.6s	remaining: 6.9s
343:	learn: 0.4758092	total: 3.61s	remaining: 6.89s
344:	learn: 0.4755900	total: 3.62s	remaining: 6.87s
345:	learn: 0.4752882	total: 3.63s	remaining: 6.87s
346:	learn: 0.4751595	total: 3.64s	remaining: 6.85s
347:	learn: 0.4749799	total: 3.65s	remaining: 6.84s
348:	learn: 0.4747831	total: 3.66s	remaining: 6.83s
349:	learn: 0.4744775	total: 3.67s	remaining: 6.82s
350:	learn: 0.4742884	total: 3.68s	remaining: 6.8s
351:	learn: 0.4740857	total: 3.69s	remaining: 6.79s
352:	learn: 0.4737490	total: 3.7s	remaining: 6.78s
353:	learn: 0.4735072	total: 3.71s	remaining: 6.77s
354:	learn: 0.47326

504:	learn: 0.4338732	total: 5.22s	remaining: 5.12s
505:	learn: 0.4336727	total: 5.23s	remaining: 5.11s
506:	learn: 0.4334339	total: 5.24s	remaining: 5.1s
507:	learn: 0.4331365	total: 5.25s	remaining: 5.08s
508:	learn: 0.4329422	total: 5.26s	remaining: 5.08s
509:	learn: 0.4326453	total: 5.27s	remaining: 5.06s
510:	learn: 0.4324174	total: 5.28s	remaining: 5.05s
511:	learn: 0.4322324	total: 5.29s	remaining: 5.04s
512:	learn: 0.4319488	total: 5.3s	remaining: 5.03s
513:	learn: 0.4316682	total: 5.31s	remaining: 5.02s
514:	learn: 0.4314301	total: 5.32s	remaining: 5.01s
515:	learn: 0.4311480	total: 5.33s	remaining: 5s
516:	learn: 0.4309736	total: 5.34s	remaining: 4.99s
517:	learn: 0.4307858	total: 5.36s	remaining: 4.98s
518:	learn: 0.4304925	total: 5.37s	remaining: 4.97s
519:	learn: 0.4300506	total: 5.38s	remaining: 4.96s
520:	learn: 0.4297446	total: 5.39s	remaining: 4.95s
521:	learn: 0.4294629	total: 5.4s	remaining: 4.94s
522:	learn: 0.4291767	total: 5.41s	remaining: 4.93s
523:	learn: 0.4289

667:	learn: 0.3930624	total: 6.88s	remaining: 3.42s
668:	learn: 0.3927993	total: 6.89s	remaining: 3.41s
669:	learn: 0.3925635	total: 6.9s	remaining: 3.4s
670:	learn: 0.3923637	total: 6.91s	remaining: 3.39s
671:	learn: 0.3922076	total: 6.92s	remaining: 3.38s
672:	learn: 0.3919604	total: 6.93s	remaining: 3.37s
673:	learn: 0.3917499	total: 6.94s	remaining: 3.36s
674:	learn: 0.3915398	total: 6.95s	remaining: 3.35s
675:	learn: 0.3913186	total: 6.96s	remaining: 3.34s
676:	learn: 0.3910642	total: 6.97s	remaining: 3.33s
677:	learn: 0.3908462	total: 6.99s	remaining: 3.32s
678:	learn: 0.3906063	total: 6.99s	remaining: 3.31s
679:	learn: 0.3903200	total: 7s	remaining: 3.29s
680:	learn: 0.3901294	total: 7.01s	remaining: 3.28s
681:	learn: 0.3899108	total: 7.02s	remaining: 3.27s
682:	learn: 0.3897311	total: 7.03s	remaining: 3.26s
683:	learn: 0.3895417	total: 7.04s	remaining: 3.25s
684:	learn: 0.3893095	total: 7.05s	remaining: 3.24s
685:	learn: 0.3890457	total: 7.06s	remaining: 3.23s
686:	learn: 0.388

837:	learn: 0.3555305	total: 8.57s	remaining: 1.66s
838:	learn: 0.3553217	total: 8.58s	remaining: 1.65s
839:	learn: 0.3551595	total: 8.59s	remaining: 1.64s
840:	learn: 0.3549278	total: 8.6s	remaining: 1.63s
841:	learn: 0.3547306	total: 8.61s	remaining: 1.61s
842:	learn: 0.3545303	total: 8.62s	remaining: 1.6s
843:	learn: 0.3542611	total: 8.63s	remaining: 1.59s
844:	learn: 0.3541233	total: 8.64s	remaining: 1.58s
845:	learn: 0.3538904	total: 8.65s	remaining: 1.57s
846:	learn: 0.3537074	total: 8.66s	remaining: 1.56s
847:	learn: 0.3535367	total: 8.67s	remaining: 1.55s
848:	learn: 0.3533556	total: 8.68s	remaining: 1.54s
849:	learn: 0.3532023	total: 8.69s	remaining: 1.53s
850:	learn: 0.3530042	total: 8.71s	remaining: 1.52s
851:	learn: 0.3527884	total: 8.71s	remaining: 1.51s
852:	learn: 0.3526132	total: 8.72s	remaining: 1.5s
853:	learn: 0.3524489	total: 8.73s	remaining: 1.49s
854:	learn: 0.3522932	total: 8.74s	remaining: 1.48s
855:	learn: 0.3520930	total: 8.75s	remaining: 1.47s
856:	learn: 0.3

              precision    recall  f1-score   support

           0       0.67      0.17      0.27      1125
           1       0.36      0.85      0.51       628

    accuracy                           0.41      1753
   macro avg       0.52      0.51      0.39      1753
weighted avg       0.56      0.41      0.36      1753

Precision: 0.3641933287950987
Accuracy: 0.41414717626925274
AUC: 0.49985704175513096


In [34]:
params_CatBoost= {
                  'depth': [3,5,10],
                  'learning_rate' : [0.01,0.1,1],
                  'iterations' : [5,10,50,100]
                 }

gs_cat_cc= GridSearchCV(estimator= cat_cc, param_grid= params_CatBoost, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_cat_cc.fit(X_cc_train, y_cc_train)

print("Best Parameters:", gs_cat_cc.best_params_)
print("Best Precision Score:", gs_cat_cc.best_score_)

tuned_cat_cc= gs_cat_cc.best_estimator_
y_pred_tuned_cat_cc= tuned_cat_cc.predict(X_test)
y_pred_prob_tuned_cat_cc= tuned_cat_cc.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_cat_cc, labels= tuned_cat_cc.classes_)
print(classification_report(y_test, y_pred_tuned_cat_cc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_cat_cc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_cat_cc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_cat_cc))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
0:	learn: 0.6740527	total: 8.94ms	remaining: 885ms
1:	learn: 0.6618032	total: 19.4ms	remaining: 951ms
2:	learn: 0.6564550	total: 23.7ms	remaining: 767ms
3:	learn: 0.6539883	total: 30.5ms	remaining: 733ms
4:	learn: 0.6463821	total: 38.8ms	remaining: 736ms
5:	learn: 0.6418676	total: 45.7ms	remaining: 716ms
6:	learn: 0.6274207	total: 52.7ms	remaining: 700ms
7:	learn: 0.6245340	total: 58.1ms	remaining: 669ms
8:	learn: 0.6237180	total: 66.5ms	remaining: 673ms
9:	learn: 0.6230259	total: 76.1ms	remaining: 685ms
10:	learn: 0.6134571	total: 85.3ms	remaining: 690ms
11:	learn: 0.6122115	total: 94.3ms	remaining: 691ms
12:	learn: 0.6036467	total: 106ms	remaining: 710ms
13:	learn: 0.6031359	total: 115ms	remaining: 704ms
14:	learn: 0.6008481	total: 123ms	remaining: 699ms
15:	learn: 0.5966165	total: 133ms	remaining: 696ms
16:	learn: 0.5942988	total: 140ms	remaining: 683ms
17:	learn: 0.5924545	total: 151ms	remaining: 689ms
18:	learn: 0.59193

--- Visualisation ---

In [35]:
print('Logistic Regression Precision:', precision_score(y_test, y_pred_lr_cc, zero_division= 0))
print('Decision Tree Precision:', precision_score(y_test, y_pred_dt_cc, zero_division= 0))
print('Tuned Decision Tree Precision:', precision_score(y_test, y_pred_tuned_dt_cc, zero_division= 0))
print('KNeighborsClassifier Precision:', precision_score(y_test, y_pred_knn_cc, zero_division= 0))
print('Tuned KNeighborsClassifier Precision:', precision_score(y_test, y_pred_tuned_knn_cc, zero_division= 0))
print('GaussianNB Precision:', precision_score(y_test, y_pred_nb_cc, zero_division= 0))
print('SVM Precision:', precision_score(y_test, y_pred_svc_cc, zero_division= 0))
print('Random Forest Precision:', precision_score(y_test, y_pred_rf_cc, zero_division= 0))
print('Tuned Random Forest Precision:', precision_score(y_test, y_pred_tuned_rf_cc, zero_division= 0))
print('AdaBoost Precision:', precision_score(y_test, y_pred_ada_cc, zero_division= 0))
print('Tuned AdaBoost Precision:', precision_score(y_test, y_pred_tuned_ada_cc, zero_division= 0))
print('GradientBoosting Precision:', precision_score(y_test, y_pred_grb_cc, zero_division= 0))
print('Tuned GradientBoosting Precision:', precision_score(y_test, y_pred_tuned_grb_cc, zero_division= 0))
print('XGB Precision:', precision_score(y_test, y_pred_xgb_cc, zero_division= 0))
print('Tuned XGB Precision:', precision_score(y_test, y_pred_tuned_xgb_cc, zero_division= 0))
print('LGBM Precision:', precision_score(y_test, y_pred_lgm_cc, zero_division= 0))
print('Tuned LGBM Precision:', precision_score(y_test, y_pred_tuned_lgm_cc, zero_division= 0))
print('CatBoost Precision:', precision_score(y_test, y_pred_cat_cc, zero_division= 0))
print('Tuned CatBoost Precision:', precision_score(y_test, y_pred_tuned_cat_cc, zero_division= 0))

Logistic Regression Precision: 0.3580508474576271
Decision Tree Precision: 0.36019417475728155
Tuned Decision Tree Precision: 0.36618521665250636
KNeighborsClassifier Precision: 0.3722943722943723
Tuned KNeighborsClassifier Precision: 0.36443468715697036
GaussianNB Precision: 0.36569037656903763
SVM Precision: 0.36707663197729423
Random Forest Precision: 0.3667953667953668
Tuned Random Forest Precision: 0.3618827160493827
AdaBoost Precision: 0.36148148148148146
Tuned AdaBoost Precision: 0.3601449275362319
GradientBoosting Precision: 0.3599195710455764
Tuned GradientBoosting Precision: 0.35828135828135826
XGB Precision: 0.36613272311212813
Tuned XGB Precision: 0.3686054660126139
LGBM Precision: 0.36253561253561256
Tuned LGBM Precision: 0.36588921282798836
CatBoost Precision: 0.3641933287950987
Tuned CatBoost Precision: 0.36299615877080665


In [36]:
precision_scores= {
                    'Logistic Regression Precision:': precision_score(y_test, y_pred_lr_cc, zero_division= 0),
                    'Decision Tree Precision:': precision_score(y_test, y_pred_dt_cc, zero_division= 0),
                    'Tuned Decision Tree Precision:': precision_score(y_test, y_pred_tuned_dt_cc, zero_division= 0),
                    'KNeighborsClassifier Precision:': precision_score(y_test, y_pred_knn_cc, zero_division= 0),
                    'Tuned KNeighborsClassifier Precision:': precision_score(y_test, y_pred_tuned_knn_cc, zero_division= 0),
                    'GaussianNB Precision:': precision_score(y_test, y_pred_nb_cc, zero_division= 0),
                    'SVM Precision:': precision_score(y_test, y_pred_svc_cc, zero_division= 0),
                    'Random Forest Precision:': precision_score(y_test, y_pred_rf_cc, zero_division= 0),
                    'Tuned Random Forest Precision:': precision_score(y_test, y_pred_tuned_rf_cc, zero_division= 0),
                    'AdaBoost Precision:': precision_score(y_test, y_pred_ada_cc, zero_division= 0),
                    'Tuned AdaBoost Precision:': precision_score(y_test, y_pred_tuned_ada_cc, zero_division= 0),
                    'GradientBoosting Precision:': precision_score(y_test, y_pred_grb_cc, zero_division= 0),
                    'Tuned GradientBoosting Precision:': precision_score(y_test, y_pred_tuned_grb_cc, zero_division= 0),
                    'XGB Precision:': precision_score(y_test, y_pred_xgb_cc, zero_division= 0),
                    'Tuned XGB Precision:': precision_score(y_test, y_pred_tuned_xgb_cc, zero_division= 0),
                    'LGBM Precision:': precision_score(y_test, y_pred_lgm_cc, zero_division= 0),
                    'Tuned LGBM Precision:': precision_score(y_test, y_pred_tuned_lgm_cc, zero_division= 0),
                    'CatBoost Precision:': precision_score(y_test, y_pred_cat_cc, zero_division= 0),
                    'Tuned CatBoost Precision:': precision_score(y_test, y_pred_tuned_cat_cc, zero_division= 0)
                  }

cc_precision= pd.DataFrame(list(precision_scores.items()), columns= ['Model', 'Precision Score'])
cc_precision= cc_precision.sort_values(by= 'Precision Score', ascending=False)
print(cc_precision)

                                    Model  Precision Score
3         KNeighborsClassifier Precision:         0.372294
14                   Tuned XGB Precision:         0.368605
6                          SVM Precision:         0.367077
7                Random Forest Precision:         0.366795
2          Tuned Decision Tree Precision:         0.366185
13                         XGB Precision:         0.366133
16                  Tuned LGBM Precision:         0.365889
5                   GaussianNB Precision:         0.365690
4   Tuned KNeighborsClassifier Precision:         0.364435
17                    CatBoost Precision:         0.364193
18              Tuned CatBoost Precision:         0.362996
15                        LGBM Precision:         0.362536
8          Tuned Random Forest Precision:         0.361883
9                     AdaBoost Precision:         0.361481
1                Decision Tree Precision:         0.360194
10              Tuned AdaBoost Precision:         0.3601

In [37]:
accuracy_scores= {
                    'Logistic Regression Accuracy:': accuracy_score(y_test, y_pred_lr_cc),
                    'Decision Tree Accuracy:': accuracy_score(y_test, y_pred_dt_cc),
                    'Tuned Decision Tree Accuracy:': accuracy_score(y_test, y_pred_tuned_dt_cc),
                    'KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_knn_cc),
                    'Tuned KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_tuned_knn_cc),
                    'GaussianNB Accuracy:': accuracy_score(y_test, y_pred_nb_cc),
                    'SVM Accuracy:': accuracy_score(y_test, y_pred_svc_cc),
                    'Random Forest Accuracy:': accuracy_score(y_test, y_pred_rf_cc),
                    'Tuned Random Forest Accuracy:': accuracy_score(y_test, y_pred_tuned_rf_cc),
                    'AdaBoost Accuracy:': accuracy_score(y_test, y_pred_ada_cc),
                    'Tuned AdaBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_ada_cc),
                    'GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_grb_cc),
                    'Tuned GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_tuned_grb_cc),
                    'XGB Accuracy:': accuracy_score(y_test, y_pred_xgb_cc),
                    'Tuned XGB Accuracy:': accuracy_score(y_test, y_pred_tuned_xgb_cc),
                    'LGBM Accuracy:': accuracy_score(y_test, y_pred_lgm_cc),
                    'Tuned LGBM Accuracy:': accuracy_score(y_test, y_pred_tuned_lgm_cc),
                    'CatBoost Accuracy:': accuracy_score(y_test, y_pred_cat_cc),
                    'Tuned CatBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_cat_cc)
                  }

cc_accuracy= pd.DataFrame(list(accuracy_scores.items()), columns= ['Model', 'Accuracy Score'])
cc_accuracy= cc_accuracy.sort_values(by= 'Accuracy Score', ascending=False)
print(cc_accuracy)

                                   Model  Accuracy Score
3         KNeighborsClassifier Accuracy:        0.540787
4   Tuned KNeighborsClassifier Accuracy:        0.500856
0          Logistic Regression Accuracy:        0.488876
6                          SVM Accuracy:        0.481460
1                Decision Tree Accuracy:        0.477467
2          Tuned Decision Tree Accuracy:        0.462065
5                   GaussianNB Accuracy:        0.458642
7                Random Forest Accuracy:        0.444952
13                         XGB Accuracy:        0.441529
8          Tuned Random Forest Accuracy:        0.437536
16                  Tuned LGBM Accuracy:        0.431831
9                     AdaBoost Accuracy:        0.428408
14                   Tuned XGB Accuracy:        0.427838
15                        LGBM Accuracy:        0.421563
10              Tuned AdaBoost Accuracy:        0.421563
17                    CatBoost Accuracy:        0.414147
12      Tuned GradientBoosting 

In [38]:
auc_scores= {
                    'Logistic Regression AUC:': roc_auc_score(y_test, y_pred_prob_lr_cc),
                    'Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_dt_cc),
                    'Tuned Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_tuned_dt_cc),
                    'KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_knn_cc),
                    'Tuned KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_tuned_knn_cc),
                    'GaussianNB AUC:': roc_auc_score(y_test, y_pred_prob_nb_cc),
                    'SVM AUC:': roc_auc_score(y_test, y_pred_prob_svc_cc),
                    'Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_rf_cc),
                    'Tuned Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_tuned_rf_cc),
                    'AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_ada_cc),
                    'Tuned AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_ada_cc),
                    'GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_grb_cc),
                    'Tuned GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_tuned_grb_cc),
                    'XGB AUC:': roc_auc_score(y_test, y_pred_prob_xgb_cc),
                    'Tuned XGB AUC:': roc_auc_score(y_test, y_pred_prob_tuned_xgb_cc),
                    'LGBM AUC:': roc_auc_score(y_test, y_pred_prob_lgm_cc),
                    'Tuned LGBM AUC:': roc_auc_score(y_test, y_pred_prob_tuned_lgm_cc),
                    'CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_cat_cc),
                    'Tuned CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_cat_cc)
                  }

cc_auc= pd.DataFrame(list(auc_scores.items()), columns= ['Model', 'AUC Score'])
cc_auc= cc_auc.sort_values(by= 'AUC Score', ascending=False)
print(cc_auc)

                              Model  AUC Score
8          Tuned Random Forest AUC:   0.523292
9                     AdaBoost AUC:   0.523037
7                Random Forest AUC:   0.522476
6                          SVM AUC:   0.517958
15                        LGBM AUC:   0.512917
3         KNeighborsClassifier AUC:   0.507748
18              Tuned CatBoost AUC:   0.506497
11            GradientBoosting AUC:   0.505485
13                         XGB AUC:   0.505434
14                   Tuned XGB AUC:   0.505159
12      Tuned GradientBoosting AUC:   0.504263
16                  Tuned LGBM AUC:   0.503475
1                Decision Tree AUC:   0.502493
5                   GaussianNB AUC:   0.501302
17                    CatBoost AUC:   0.499857
4   Tuned KNeighborsClassifier AUC:   0.499069
2          Tuned Decision Tree AUC:   0.497050
0          Logistic Regression AUC:   0.495212
10              Tuned AdaBoost AUC:   0.490517
