In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df= pd.read_csv('C://Users//User//Desktop//MSc Westminster//Dissertation//DataSets//Heart_Attack_Prediction.csv')

In [2]:
columns=df.columns

for column in columns:
    if df[column].dtype=="int32":
        df[column]=df[column].astype("int16")
    elif df[column].dtype=="float64":
        df[column]=df[column].astype("float16")
    elif df[column].dtype=="object":
        df[column]=df[column].astype("category")

In [3]:
df['Sex']= df['Sex'].map({'Female': 0, 'Male': 1})
df['Sex']= pd.to_numeric(df['Sex'])

df['Diet']= df['Diet'].map({'Healthy': 0, 'Average': 1, 'Unhealthy':2})
df['Diet']= pd.to_numeric(df['Diet'])

df[['HBP', 'LBP']]= df['Blood Pressure'].str.split('/', expand= True)
df['HBP']= pd.to_numeric(df['HBP'])
df['LBP']= pd.to_numeric(df['LBP'])

df['Diabetes'] = df['Diabetes'].map({0: 1, 1: 0})

df['Exercise Hours Per Week']= round(df['Exercise Hours Per Week'], 0)

df['Sedentary Hours Per Day']= round(df['Sedentary Hours Per Day'], 0)

df['Income']= round(df['Income'], 0)

df['BMI']= round(df['BMI'], 0)

df = df.drop(columns=['Patient ID', 'Blood Pressure', 'Country', 'Continent', 'Hemisphere'])

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 23 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              8763 non-null   int64  
 1   Sex                              8763 non-null   int64  
 2   Cholesterol                      8763 non-null   int64  
 3   Heart Rate                       8763 non-null   int64  
 4   Diabetes                         8763 non-null   int64  
 5   Family History                   8763 non-null   int64  
 6   Smoking                          8763 non-null   int64  
 7   Obesity                          8763 non-null   int64  
 8   Alcohol Consumption              8763 non-null   int64  
 9   Exercise Hours Per Week          8763 non-null   float16
 10  Diet                             8763 non-null   int64  
 11  Previous Heart Problems          8763 non-null   int64  
 12  Medication Use      

In [5]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,8763.0,53.70798,21.249509,18.0,35.0,54.0,72.0,90.0
Sex,8763.0,0.6973639,0.459425,0.0,0.0,1.0,1.0,1.0
Cholesterol,8763.0,259.8772,80.863276,120.0,192.0,259.0,330.0,400.0
Heart Rate,8763.0,75.02168,20.550948,40.0,57.0,75.0,93.0,110.0
Diabetes,8763.0,0.347712,0.476271,0.0,0.0,0.0,1.0,1.0
Family History,8763.0,0.4929819,0.499979,0.0,0.0,0.0,1.0,1.0
Smoking,8763.0,0.896839,0.304186,0.0,1.0,1.0,1.0,1.0
Obesity,8763.0,0.5014265,0.500026,0.0,0.0,1.0,1.0,1.0
Alcohol Consumption,8763.0,0.5980828,0.490313,0.0,0.0,1.0,1.0,1.0
Exercise Hours Per Week,8763.0,inf,5.796875,0.0,5.0,10.0,15.0,20.0


In [6]:
X= df.drop(['Heart Attack Risk'], axis= 1)
y= df['Heart Attack Risk']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.2, random_state= 15, stratify= y)

from sklearn.preprocessing import StandardScaler
ss= StandardScaler()
X_train= ss.fit_transform(X_train)
X_test= ss.transform(X_test)

In [8]:
from imblearn.under_sampling import ClusterCentroids
from imblearn.over_sampling import ADASYN
from sklearn.cluster import KMeans

cc= ClusterCentroids(random_state= 15, estimator= KMeans(n_init= 10))
ada= ADASYN(random_state= 15)

X_cc, y_cc= cc.fit_resample(X_train, y_train)
X_cc_ada, y_cc_ada= ada.fit_resample(X_cc, y_cc)

--- LogisticRegression ---

In [9]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, roc_auc_score, accuracy_score
from sklearn.linear_model import LogisticRegression
lr_cc_ada= LogisticRegression(random_state=15)
lr_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_lr_cc_ada= lr_cc_ada.predict(X_test)
y_pred_prob_lr_cc_ada= lr_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_lr_cc_ada, labels= lr_cc_ada.classes_)
print(classification_report(y_test, y_pred_lr_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lr_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lr_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_lr_cc_ada))

              precision    recall  f1-score   support

           0       0.64      0.45      0.53      1125
           1       0.35      0.54      0.43       628

    accuracy                           0.48      1753
   macro avg       0.49      0.49      0.48      1753
weighted avg       0.53      0.48      0.49      1753

Precision: 0.35257082896117525
Accuracy: 0.4814603536794067
AUC: 0.4932937013446568


--- DecisionTreeClassifier ---

In [10]:
from sklearn.tree import DecisionTreeClassifier
dt_cc_ada= DecisionTreeClassifier(random_state=15)
dt_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_dt_cc_ada= dt_cc_ada.predict(X_test)
y_pred_prob_dt_cc_ada= dt_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_dt_cc_ada, labels= dt_cc_ada.classes_)
print(classification_report(y_test, y_pred_dt_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_dt_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_dt_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_dt_cc_ada))

              precision    recall  f1-score   support

           0       0.65      0.45      0.53      1125
           1       0.36      0.56      0.44       628

    accuracy                           0.49      1753
   macro avg       0.50      0.50      0.48      1753
weighted avg       0.54      0.49      0.50      1753

Precision: 0.3612836438923395
Accuracy: 0.48887621220764405
AUC: 0.5036440198159944


--- Tuned - DecisionTreeClassifier ---

In [11]:
from sklearn.model_selection import GridSearchCV
param_grid= {
             'max_depth': [None, 10, 20, 30, 40, 50],
             'min_samples_split': [2, 10, 20],
             'min_samples_leaf': [1, 5, 10],
             'max_features': [None, 'sqrt', 'log2'],
             'criterion': ['gini', 'entropy']
            }

gs_dt_cc_ada= GridSearchCV(estimator= dt_cc_ada, param_grid= param_grid, cv= 5, scoring= 'precision')
gs_dt_cc_ada.fit(X_cc_ada, y_cc_ada)

print("Best Parameters:", gs_dt_cc_ada.best_params_)
print("Best Precision Score:", gs_dt_cc_ada.best_score_)

Best Parameters: {'criterion': 'entropy', 'max_depth': 10, 'max_features': None, 'min_samples_leaf': 5, 'min_samples_split': 2}
Best Precision Score: 0.5827962298635625


In [12]:
tuned_dt_cc_ada= gs_dt_cc_ada.best_estimator_
y_pred_tuned_dt_cc_ada= tuned_dt_cc_ada.predict(X_test)
y_pred_prob_tuned_dt_cc_ada= tuned_dt_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_dt_cc_ada, labels= tuned_dt_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_dt_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_dt_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_dt_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_dt_cc_ada))

              precision    recall  f1-score   support

           0       0.65      0.45      0.53      1125
           1       0.36      0.57      0.44       628

    accuracy                           0.49      1753
   macro avg       0.51      0.51      0.49      1753
weighted avg       0.55      0.49      0.50      1753

Precision: 0.36493374108053006
Accuracy: 0.4905875641756988
AUC: 0.5006999292285916


--- KNeighborsClassifier ---

In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn_cc_ada= KNeighborsClassifier()
knn_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_knn_cc_ada= knn_cc_ada.predict(X_test)
y_pred_prob_knn_cc_ada= knn_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_knn_cc_ada, labels= knn_cc_ada.classes_)
print(classification_report(y_test, y_pred_knn_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_knn_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_knn_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_knn_cc_ada))

              precision    recall  f1-score   support

           0       0.64      0.60      0.62      1125
           1       0.36      0.40      0.38       628

    accuracy                           0.53      1753
   macro avg       0.50      0.50      0.50      1753
weighted avg       0.54      0.53      0.53      1753

Precision: 0.3590844062947067
Accuracy: 0.5293782087849401
AUC: 0.5053489030431706


--- Tuned - KNeighborsClassifier ---

In [14]:
param_grid= {
             'n_neighbors': np.arange(1,40),
             'weights': ['uniform', 'distance'],
             'metric': ['euclidean', 'manhattan', 'minkowski']
            }

gs_knn_cc_ada= GridSearchCV(estimator= knn_cc_ada, param_grid= param_grid, cv=5, scoring= 'precision')
gs_knn_cc_ada.fit(X_cc_ada, y_cc_ada)
print("Best Parameters:", gs_knn_cc_ada.best_params_)
print("Best Precision Score:", gs_knn_cc_ada.best_score_)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 37, 'weights': 'uniform'}
Best Precision Score: 0.5329098138827095


In [15]:
tuned_knn_cc_ada= gs_knn_cc_ada.best_estimator_
y_pred_tuned_knn_cc_ada= tuned_knn_cc_ada.predict(X_test)
y_pred_prob_tuned_knn_cc_ada= tuned_knn_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_knn_cc_ada, labels= tuned_knn_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_knn_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_knn_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_knn_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_knn_cc_ada))

              precision    recall  f1-score   support

           0       0.65      0.48      0.55      1125
           1       0.37      0.54      0.44       628

    accuracy                           0.50      1753
   macro avg       0.51      0.51      0.49      1753
weighted avg       0.55      0.50      0.51      1753

Precision: 0.36706135629709363
Accuracy: 0.5008556759840274
AUC: 0.5003489030431705


--- GaussianNB ---

In [16]:
from sklearn.naive_bayes import GaussianNB
nb_cc_ada= GaussianNB()
nb_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_nb_cc_ada= nb_cc_ada.predict(X_test)
y_pred_prob_nb_cc_ada= nb_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_nb_cc_ada, labels= nb_cc_ada.classes_)
print(classification_report(y_test, y_pred_nb_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_nb_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_nb_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_nb_cc_ada))

              precision    recall  f1-score   support

           0       0.65      0.31      0.42      1125
           1       0.36      0.70      0.48       628

    accuracy                           0.45      1753
   macro avg       0.51      0.51      0.45      1753
weighted avg       0.55      0.45      0.44      1753

Precision: 0.3624382207578254
Accuracy: 0.45122646891043927
AUC: 0.5048874734607218


--- SVM ---

In [17]:
from sklearn.svm import SVC
svc_cc_ada= SVC(kernel= 'rbf',probability= True, gamma= 1, random_state=15)
svc_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_svc_cc_ada= svc_cc_ada.predict(X_test)
y_pred_prob_svc_cc_ada= svc_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_svc_cc_ada, labels= svc_cc_ada.classes_)
print(classification_report(y_test, y_pred_svc_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_svc_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_svc_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_svc_cc_ada))

              precision    recall  f1-score   support

           0       0.64      0.89      0.75      1125
           1       0.38      0.12      0.18       628

    accuracy                           0.61      1753
   macro avg       0.51      0.50      0.46      1753
weighted avg       0.55      0.61      0.54      1753

Precision: 0.3756345177664975
Accuracy: 0.6138049058756417
AUC: 0.4993871196036802


--- Random Forest ---

In [18]:
from sklearn.ensemble import RandomForestClassifier
rf_cc_ada= RandomForestClassifier(random_state=15)
rf_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_rf_cc_ada= rf_cc_ada.predict(X_test)
y_pred_prob_rf_cc_ada= rf_cc_ada.predict_proba(X_test)[:,1]

In [19]:
cm= confusion_matrix(y_test, y_pred_rf_cc_ada, labels= rf_cc_ada.classes_)
print(classification_report(y_test, y_pred_rf_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_rf_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_rf_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_rf_cc_ada))

              precision    recall  f1-score   support

           0       0.69      0.28      0.40      1125
           1       0.38      0.78      0.51       628

    accuracy                           0.46      1753
   macro avg       0.54      0.53      0.45      1753
weighted avg       0.58      0.46      0.44      1753

Precision: 0.3765432098765432
Accuracy: 0.4592127780946948
AUC: 0.5209341825902335


In [20]:
param_grid= {
             'n_estimators': [100, 200, 300],
             'max_depth': [None, 10, 20, 30],
             'min_samples_split': [2, 5, 10],
             'min_samples_leaf': [1, 2, 4],
            }

gs_tuned_rf_cc_ada= GridSearchCV(estimator= rf_cc_ada, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_tuned_rf_cc_ada.fit(X_cc_ada, y_cc_ada)
print("Best Parameters:", gs_tuned_rf_cc_ada.best_params_)
print("Best Precision Score:", gs_tuned_rf_cc_ada.best_score_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 300}
Best Precision Score: 0.6334785090640077


In [21]:
tuned_rf_cc_ada= gs_tuned_rf_cc_ada.best_estimator_
y_pred_tuned_rf_cc_ada= tuned_rf_cc_ada.predict(X_test)
y_pred_prob_tuned_rf_cc_ada= tuned_rf_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_rf_cc_ada, labels= tuned_rf_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_rf_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_rf_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_rf_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_rf_cc_ada))

              precision    recall  f1-score   support

           0       0.67      0.30      0.41      1125
           1       0.37      0.73      0.49       628

    accuracy                           0.45      1753
   macro avg       0.52      0.52      0.45      1753
weighted avg       0.56      0.45      0.44      1753

Precision: 0.36850519584332536
Accuracy: 0.4540787221905305
AUC: 0.520544939844303


--- AdaBoost ---

In [24]:
from sklearn.ensemble import AdaBoostClassifier
ada_cc_ada= AdaBoostClassifier(random_state=15)
ada_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_ada_cc_ada= ada_cc_ada.predict(X_test)
y_pred_prob_ada_cc_ada= ada_cc_ada.predict_proba(X_test)[:,1]

In [25]:
cm= confusion_matrix(y_test, y_pred_ada_cc_ada, labels= ada_cc_ada.classes_)
print(classification_report(y_test, y_pred_ada_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_ada_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_ada_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_ada_cc_ada))

              precision    recall  f1-score   support

           0       0.66      0.30      0.41      1125
           1       0.37      0.72      0.49       628

    accuracy                           0.45      1753
   macro avg       0.51      0.51      0.45      1753
weighted avg       0.56      0.45      0.44      1753

Precision: 0.36642453591606133
Accuracy: 0.452937820878494
AUC: 0.501862703467799


In [44]:
param_grid= {'n_estimators': [50, 100, 200]}

gs_ada_cc_ada= GridSearchCV(estimator= ada_cc_ada, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_ada_cc_ada.fit(X_cc_ada, y_cc_ada)
print("Best Parameters:", gs_ada_cc_ada.best_params_)
print("Best Precision Score:", gs_ada_cc_ada.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'n_estimators': 200}
Best Precision Score: 0.6380368700315293


In [45]:
tuned_ada_cc_ada= gs_ada_cc_ada.best_estimator_
y_pred_tuned_ada_cc_ada= tuned_ada_cc_ada.predict(X_test)
y_pred_prob_tuned_ada_cc_ada= tuned_ada_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_ada_cc_ada, labels= tuned_ada_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_ada_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_ada_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_ada_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_ada_cc_ada))

              precision    recall  f1-score   support

           0       0.66      0.25      0.36      1125
           1       0.37      0.77      0.50       628

    accuracy                           0.44      1753
   macro avg       0.51      0.51      0.43      1753
weighted avg       0.56      0.44      0.41      1753

Precision: 0.3654860587792012
Accuracy: 0.4381061038220194
AUC: 0.5016546355272471


--- GradientBoosting ---

In [46]:
from sklearn.ensemble import GradientBoostingClassifier
grb_cc_ada= GradientBoostingClassifier(random_state=15)
grb_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_grb_cc_ada= grb_cc_ada.predict(X_test)
y_pred_prob_grb_cc_ada= grb_cc_ada.predict_proba(X_test)[:,1]


cm= confusion_matrix(y_test, y_pred_grb_cc_ada, labels= grb_cc_ada.classes_)
print(classification_report(y_test, y_pred_grb_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_grb_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_grb_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_grb_cc_ada))

              precision    recall  f1-score   support

           0       0.66      0.18      0.29      1125
           1       0.36      0.83      0.51       628

    accuracy                           0.42      1753
   macro avg       0.51      0.51      0.40      1753
weighted avg       0.55      0.42      0.36      1753

Precision: 0.3624393624393624
Accuracy: 0.4152880775812892
AUC: 0.49532625619249826


In [47]:
param_grid= {'learning_rate': [0.01, 0.1, 0.2]}

gs_grb_cc_ada= GridSearchCV(estimator= grb_cc_ada, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_grb_cc_ada.fit(X_cc_ada, y_cc_ada)

print("Best Parameters:", gs_grb_cc_ada.best_params_)
print("Best Precision Score:", gs_grb_cc_ada.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.2}
Best Precision Score: 0.665340964737106


In [48]:
tuned_grb_cc_ada= gs_grb_cc_ada.best_estimator_
y_pred_tuned_grb_cc_ada= tuned_grb_cc_ada.predict(X_test)
y_pred_prob_tuned_grb_cc_ada= tuned_grb_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_grb_cc_ada, labels= tuned_grb_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_grb_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_grb_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_grb_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_grb_cc_ada))

              precision    recall  f1-score   support

           0       0.67      0.19      0.30      1125
           1       0.36      0.83      0.51       628

    accuracy                           0.42      1753
   macro avg       0.52      0.51      0.40      1753
weighted avg       0.56      0.42      0.37      1753

Precision: 0.36477987421383645
Accuracy: 0.4209925841414718
AUC: 0.5093205944798301


--- XGB ---

In [49]:
from xgboost import XGBClassifier
xgb_cc_ada= XGBClassifier(random_state=15)
xgb_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_xgb_cc_ada= xgb_cc_ada.predict(X_test)
y_pred_prob_xgb_cc_ada= xgb_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_xgb_cc_ada, labels= xgb_cc_ada.classes_)
print(classification_report(y_test, y_pred_xgb_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_xgb_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_xgb_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_xgb_cc_ada))

              precision    recall  f1-score   support

           0       0.65      0.25      0.36      1125
           1       0.36      0.76      0.49       628

    accuracy                           0.43      1753
   macro avg       0.50      0.50      0.42      1753
weighted avg       0.55      0.43      0.41      1753

Precision: 0.3603945371775417
Accuracy: 0.4318311466058186
AUC: 0.5033234253361643


In [50]:
params_XGBoost= {'learning_rate': [0.01, 0.1, 1.0]}

gs_xgb_cc_ada= GridSearchCV(estimator= xgb_cc_ada, param_grid= params_XGBoost, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_xgb_cc_ada.fit(X_cc_ada, y_cc_ada)

print("Best Parameters:", gs_xgb_cc_ada.best_params_)
print("Best Precision Score:", gs_xgb_cc_ada.best_score_)

tuned_xgb_cc_ada= gs_xgb_cc_ada.best_estimator_
y_pred_tuned_xgb_cc_ada= tuned_xgb_cc_ada.predict(X_test)
y_pred_prob_tuned_xgb_cc_ada= tuned_xgb_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_xgb_cc_ada, labels= tuned_xgb_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_xgb_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_xgb_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_xgb_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_xgb_cc_ada))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.1}
Best Precision Score: 0.6590167069345183
              precision    recall  f1-score   support

           0       0.68      0.22      0.34      1125
           1       0.37      0.81      0.51       628

    accuracy                           0.43      1753
   macro avg       0.52      0.52      0.42      1753
weighted avg       0.57      0.43      0.40      1753

Precision: 0.3680404916847433
Accuracy: 0.43354249857387334
AUC: 0.497238499646143


--- LGBM ---

In [51]:
from lightgbm import LGBMClassifier
lgm_cc_ada= LGBMClassifier(random_state=15)
lgm_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_lgm_cc_ada= lgm_cc_ada.predict(X_test)
y_pred_prob_lgm_cc_ada= lgm_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_lgm_cc_ada, labels= lgm_cc_ada.classes_)
print(classification_report(y_test, y_pred_lgm_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lgm_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lgm_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lgm_cc_ada))

[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000698 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2298
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
              precision    recall  f1-score   support

           0       0.68      0.23      0.34      1125
           1       0.37      0.80      0.51       628

    accuracy                           0.44      1753
   macro avg       0.52      0.52      0.42      1753
weighted avg       0.57      0.44      0.40      1753

Precision: 0.36834427425237054
Accuracy: 0.43582430119794635
AUC: 0.5091464968152867


In [52]:
params_LGB= {'learning_rate': [0.001, 0.01, 0.1, 1.0],
             'num_leaves': [31, 127],
             'reg_alpha': [0.1, 0.5],
             'min_data_in_leaf': [30, 50, 100, 300, 400]}

gs_lgm_cc_ada= GridSearchCV(estimator= lgm_cc_ada, param_grid= params_LGB, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_lgm_cc_ada.fit(X_cc_ada, y_cc_ada)

print("Best Parameters:", gs_lgm_cc_ada.best_params_)
print("Best Precision Score:", gs_lgm_cc_ada.best_score_)

tuned_lgm_cc_ada= gs_lgm_cc_ada.best_estimator_
y_pred_tuned_lgm_cc_ada= tuned_lgm_cc_ada.predict(X_test)
y_pred_prob_tuned_lgm_cc_ada= tuned_lgm_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_lgm_cc_ada, labels= tuned_lgm_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_lgm_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_lgm_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_lgm_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_lgm_cc_ada))

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000743 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2298
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Best Parameters: {'learning_rate': 0.1, 'min_data_in_leaf': 30, 'num_leaves': 31, 'reg_alpha': 0.1}
Best Precision Score: 0.6621829879230062
              precision    recall  f1-score   support

           0       0.66      0.22      0.34      1125
           1       0.36      0.79      0.50       628

    accuracy                           0.43      1753
   macro avg       0.51      0.51      0.42      1753
weighted avg       0.55      0.43      0.39      1753

Precision: 0.36303871439006574
Acc

--- CatBoost ---

In [53]:
from catboost import CatBoostClassifier
cat_cc_ada= CatBoostClassifier(random_state=15)
cat_cc_ada.fit(X_cc_ada, y_cc_ada)
y_pred_cat_cc_ada= cat_cc_ada.predict(X_test)
y_pred_prob_cat_cc_ada= cat_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_cat_cc_ada, labels= cat_cc_ada.classes_)
print(classification_report(y_test, y_pred_cat_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_cat_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_cat_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_cat_cc_ada))

Learning rate set to 0.020521
0:	learn: 0.6872793	total: 4.84ms	remaining: 4.84s
1:	learn: 0.6865850	total: 9.59ms	remaining: 4.78s
2:	learn: 0.6855234	total: 14ms	remaining: 4.65s
3:	learn: 0.6799286	total: 19.3ms	remaining: 4.8s
4:	learn: 0.6789263	total: 24.5ms	remaining: 4.87s
5:	learn: 0.6778909	total: 29.6ms	remaining: 4.9s
6:	learn: 0.6755403	total: 34.2ms	remaining: 4.84s
7:	learn: 0.6747085	total: 38.6ms	remaining: 4.79s
8:	learn: 0.6727113	total: 43.2ms	remaining: 4.76s
9:	learn: 0.6717234	total: 47.5ms	remaining: 4.7s
10:	learn: 0.6711752	total: 51.5ms	remaining: 4.63s
11:	learn: 0.6700901	total: 55.5ms	remaining: 4.57s
12:	learn: 0.6689789	total: 59.5ms	remaining: 4.52s
13:	learn: 0.6669770	total: 63.3ms	remaining: 4.46s
14:	learn: 0.6639284	total: 67.2ms	remaining: 4.42s
15:	learn: 0.6592085	total: 71.1ms	remaining: 4.37s
16:	learn: 0.6587449	total: 74.9ms	remaining: 4.33s
17:	learn: 0.6579856	total: 78.8ms	remaining: 4.3s
18:	learn: 0.6555849	total: 82.6ms	remaining: 4.27

171:	learn: 0.5518170	total: 710ms	remaining: 3.42s
172:	learn: 0.5515330	total: 714ms	remaining: 3.41s
173:	learn: 0.5512167	total: 718ms	remaining: 3.41s
174:	learn: 0.5509637	total: 721ms	remaining: 3.4s
175:	learn: 0.5506845	total: 726ms	remaining: 3.4s
176:	learn: 0.5504194	total: 730ms	remaining: 3.39s
177:	learn: 0.5500944	total: 733ms	remaining: 3.39s
178:	learn: 0.5494386	total: 737ms	remaining: 3.38s
179:	learn: 0.5489883	total: 742ms	remaining: 3.38s
180:	learn: 0.5487082	total: 748ms	remaining: 3.38s
181:	learn: 0.5481804	total: 754ms	remaining: 3.39s
182:	learn: 0.5478980	total: 757ms	remaining: 3.38s
183:	learn: 0.5474684	total: 761ms	remaining: 3.38s
184:	learn: 0.5468528	total: 766ms	remaining: 3.37s
185:	learn: 0.5465594	total: 770ms	remaining: 3.37s
186:	learn: 0.5461786	total: 772ms	remaining: 3.36s
187:	learn: 0.5459483	total: 776ms	remaining: 3.35s
188:	learn: 0.5451507	total: 780ms	remaining: 3.35s
189:	learn: 0.5446178	total: 784ms	remaining: 3.34s
190:	learn: 0.

338:	learn: 0.4999833	total: 1.42s	remaining: 2.77s
339:	learn: 0.4997372	total: 1.42s	remaining: 2.76s
340:	learn: 0.4995686	total: 1.43s	remaining: 2.76s
341:	learn: 0.4993203	total: 1.43s	remaining: 2.75s
342:	learn: 0.4989365	total: 1.44s	remaining: 2.75s
343:	learn: 0.4986964	total: 1.44s	remaining: 2.75s
344:	learn: 0.4985176	total: 1.44s	remaining: 2.74s
345:	learn: 0.4982779	total: 1.45s	remaining: 2.74s
346:	learn: 0.4979520	total: 1.45s	remaining: 2.73s
347:	learn: 0.4976080	total: 1.46s	remaining: 2.73s
348:	learn: 0.4973632	total: 1.46s	remaining: 2.72s
349:	learn: 0.4971158	total: 1.46s	remaining: 2.72s
350:	learn: 0.4967688	total: 1.47s	remaining: 2.71s
351:	learn: 0.4964491	total: 1.47s	remaining: 2.71s
352:	learn: 0.4961347	total: 1.48s	remaining: 2.71s
353:	learn: 0.4959256	total: 1.48s	remaining: 2.71s
354:	learn: 0.4955316	total: 1.49s	remaining: 2.7s
355:	learn: 0.4952131	total: 1.49s	remaining: 2.7s
356:	learn: 0.4949897	total: 1.5s	remaining: 2.7s
357:	learn: 0.49

531:	learn: 0.4484850	total: 2.3s	remaining: 2.03s
532:	learn: 0.4482179	total: 2.31s	remaining: 2.02s
533:	learn: 0.4479254	total: 2.31s	remaining: 2.02s
534:	learn: 0.4476999	total: 2.32s	remaining: 2.02s
535:	learn: 0.4473616	total: 2.32s	remaining: 2.01s
536:	learn: 0.4470437	total: 2.33s	remaining: 2.01s
537:	learn: 0.4467921	total: 2.33s	remaining: 2s
538:	learn: 0.4466213	total: 2.34s	remaining: 2s
539:	learn: 0.4461662	total: 2.34s	remaining: 2s
540:	learn: 0.4459241	total: 2.35s	remaining: 1.99s
541:	learn: 0.4457372	total: 2.35s	remaining: 1.99s
542:	learn: 0.4454075	total: 2.36s	remaining: 1.99s
543:	learn: 0.4451043	total: 2.36s	remaining: 1.98s
544:	learn: 0.4448202	total: 2.37s	remaining: 1.98s
545:	learn: 0.4446059	total: 2.37s	remaining: 1.97s
546:	learn: 0.4443535	total: 2.38s	remaining: 1.97s
547:	learn: 0.4440661	total: 2.38s	remaining: 1.96s
548:	learn: 0.4437775	total: 2.39s	remaining: 1.96s
549:	learn: 0.4434756	total: 2.39s	remaining: 1.96s
550:	learn: 0.4431816	

690:	learn: 0.4073456	total: 3.02s	remaining: 1.35s
691:	learn: 0.4070897	total: 3.02s	remaining: 1.35s
692:	learn: 0.4068541	total: 3.03s	remaining: 1.34s
693:	learn: 0.4065709	total: 3.03s	remaining: 1.34s
694:	learn: 0.4063280	total: 3.04s	remaining: 1.33s
695:	learn: 0.4060685	total: 3.04s	remaining: 1.33s
696:	learn: 0.4058827	total: 3.04s	remaining: 1.32s
697:	learn: 0.4056203	total: 3.05s	remaining: 1.32s
698:	learn: 0.4054143	total: 3.05s	remaining: 1.31s
699:	learn: 0.4051250	total: 3.06s	remaining: 1.31s
700:	learn: 0.4049288	total: 3.06s	remaining: 1.31s
701:	learn: 0.4046835	total: 3.07s	remaining: 1.3s
702:	learn: 0.4045380	total: 3.07s	remaining: 1.3s
703:	learn: 0.4043105	total: 3.08s	remaining: 1.29s
704:	learn: 0.4040073	total: 3.08s	remaining: 1.29s
705:	learn: 0.4036820	total: 3.09s	remaining: 1.29s
706:	learn: 0.4034605	total: 3.09s	remaining: 1.28s
707:	learn: 0.4032838	total: 3.1s	remaining: 1.28s
708:	learn: 0.4031345	total: 3.1s	remaining: 1.27s
709:	learn: 0.40

851:	learn: 0.3725990	total: 3.73s	remaining: 647ms
852:	learn: 0.3724124	total: 3.73s	remaining: 643ms
853:	learn: 0.3722080	total: 3.73s	remaining: 639ms
854:	learn: 0.3720236	total: 3.74s	remaining: 634ms
855:	learn: 0.3718311	total: 3.74s	remaining: 630ms
856:	learn: 0.3715920	total: 3.75s	remaining: 625ms
857:	learn: 0.3713848	total: 3.75s	remaining: 621ms
858:	learn: 0.3710992	total: 3.75s	remaining: 616ms
859:	learn: 0.3709168	total: 3.76s	remaining: 612ms
860:	learn: 0.3706121	total: 3.76s	remaining: 608ms
861:	learn: 0.3703325	total: 3.77s	remaining: 603ms
862:	learn: 0.3701592	total: 3.77s	remaining: 599ms
863:	learn: 0.3699758	total: 3.78s	remaining: 595ms
864:	learn: 0.3697322	total: 3.78s	remaining: 590ms
865:	learn: 0.3695286	total: 3.79s	remaining: 586ms
866:	learn: 0.3693744	total: 3.79s	remaining: 581ms
867:	learn: 0.3691823	total: 3.79s	remaining: 577ms
868:	learn: 0.3689789	total: 3.8s	remaining: 573ms
869:	learn: 0.3688096	total: 3.8s	remaining: 568ms
870:	learn: 0.

In [54]:
params_CatBoost= {
                  'depth': [3,5,10],
                  'learning_rate' : [0.01,0.1,1],
                  'iterations' : [5,10,50,100]
                 }

gs_cat_cc_ada= GridSearchCV(estimator= cat_cc_ada, param_grid= params_CatBoost, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_cat_cc_ada.fit(X_cc_ada, y_cc_ada)

print("Best Parameters:", gs_cat_cc_ada.best_params_)
print("Best Precision Score:", gs_cat_cc_ada.best_score_)

tuned_cat_cc_ada= gs_cat_cc_ada.best_estimator_
y_pred_tuned_cat_cc_ada= tuned_cat_cc_ada.predict(X_test)
y_pred_prob_tuned_cat_cc_ada= tuned_cat_cc_ada.predict_proba(X_test)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_cat_cc_ada, labels= tuned_cat_cc_ada.classes_)
print(classification_report(y_test, y_pred_tuned_cat_cc_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_cat_cc_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_cat_cc_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_cat_cc_ada))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
0:	learn: 0.6795459	total: 3.71ms	remaining: 367ms
1:	learn: 0.6776285	total: 7.36ms	remaining: 361ms
2:	learn: 0.6739796	total: 11.5ms	remaining: 371ms
3:	learn: 0.6699861	total: 15.4ms	remaining: 370ms
4:	learn: 0.6543746	total: 19.3ms	remaining: 367ms
5:	learn: 0.6449979	total: 23.4ms	remaining: 367ms
6:	learn: 0.6399701	total: 27.8ms	remaining: 369ms
7:	learn: 0.6336400	total: 32.1ms	remaining: 369ms
8:	learn: 0.6319453	total: 36.1ms	remaining: 365ms
9:	learn: 0.6286404	total: 40.1ms	remaining: 361ms
10:	learn: 0.6223310	total: 44.1ms	remaining: 357ms
11:	learn: 0.6181630	total: 48.9ms	remaining: 359ms
12:	learn: 0.6164651	total: 53.7ms	remaining: 359ms
13:	learn: 0.6146860	total: 58.1ms	remaining: 357ms
14:	learn: 0.6132185	total: 63.3ms	remaining: 359ms
15:	learn: 0.6095065	total: 67.5ms	remaining: 355ms
16:	learn: 0.6087476	total: 71.7ms	remaining: 350ms
17:	learn: 0.6020274	total: 76.4ms	remaining: 348ms
18:	learn: 0

--- Visualisation ---

In [55]:
print('Logistic Regression Precision:', precision_score(y_test, y_pred_lr_cc_ada, zero_division= 0))
print('Decision Tree Precision:', precision_score(y_test, y_pred_dt_cc_ada, zero_division= 0))
print('Tuned Decision Tree Precision:', precision_score(y_test, y_pred_tuned_dt_cc_ada, zero_division= 0))
print('KNeighborsClassifier Precision:', precision_score(y_test, y_pred_knn_cc_ada, zero_division= 0))
print('Tuned KNeighborsClassifier Precision:', precision_score(y_test, y_pred_tuned_knn_cc_ada, zero_division= 0))
print('GaussianNB Precision:', precision_score(y_test, y_pred_nb_cc_ada, zero_division= 0))
print('SVM Precision:', precision_score(y_test, y_pred_svc_cc_ada, zero_division= 0))
print('Random Forest Precision:', precision_score(y_test, y_pred_rf_cc_ada, zero_division= 0))
print('Tuned Random Forest Precision:', precision_score(y_test, y_pred_tuned_rf_cc_ada, zero_division= 0))
print('AdaBoost Precision:', precision_score(y_test, y_pred_ada_cc_ada, zero_division= 0))
print('Tuned AdaBoost Precision:', precision_score(y_test, y_pred_tuned_ada_cc_ada, zero_division= 0))
print('GradientBoosting Precision:', precision_score(y_test, y_pred_grb_cc_ada, zero_division= 0))
print('Tuned GradientBoosting Precision:', precision_score(y_test, y_pred_tuned_grb_cc_ada, zero_division= 0))
print('XGB Precision:', precision_score(y_test, y_pred_xgb_cc_ada, zero_division= 0))
print('Tuned XGB Precision:', precision_score(y_test, y_pred_tuned_xgb_cc_ada, zero_division= 0))
print('LGBM Precision:', precision_score(y_test, y_pred_lgm_cc_ada, zero_division= 0))
print('Tuned LGBM Precision:', precision_score(y_test, y_pred_tuned_lgm_cc_ada, zero_division= 0))
print('CatBoost Precision:', precision_score(y_test, y_pred_cat_cc_ada, zero_division= 0))
print('Tuned CatBoost Precision:', precision_score(y_test, y_pred_tuned_cat_cc_ada, zero_division= 0))

Logistic Regression Precision: 0.35257082896117525
Decision Tree Precision: 0.3612836438923395
Tuned Decision Tree Precision: 0.36493374108053006
KNeighborsClassifier Precision: 0.3590844062947067
Tuned KNeighborsClassifier Precision: 0.36706135629709363
GaussianNB Precision: 0.3624382207578254
SVM Precision: 0.3756345177664975
Random Forest Precision: 0.3765432098765432
Tuned Random Forest Precision: 0.36850519584332536
AdaBoost Precision: 0.36642453591606133
Tuned AdaBoost Precision: 0.3654860587792012
GradientBoosting Precision: 0.3624393624393624
Tuned GradientBoosting Precision: 0.36477987421383645
XGB Precision: 0.3603945371775417
Tuned XGB Precision: 0.3680404916847433
LGBM Precision: 0.36834427425237054
Tuned LGBM Precision: 0.36303871439006574
CatBoost Precision: 0.364314789687924
Tuned CatBoost Precision: 0.35933333333333334


In [56]:
precision_scores= {
                    'Logistic Regression Precision:': precision_score(y_test, y_pred_lr_cc_ada, zero_division= 0),
                    'Decision Tree Precision:': precision_score(y_test, y_pred_dt_cc_ada, zero_division= 0),
                    'Tuned Decision Tree Precision:': precision_score(y_test, y_pred_tuned_dt_cc_ada, zero_division= 0),
                    'KNeighborsClassifier Precision:': precision_score(y_test, y_pred_knn_cc_ada, zero_division= 0),
                    'Tuned KNeighborsClassifier Precision:': precision_score(y_test, y_pred_tuned_knn_cc_ada, zero_division= 0),
                    'GaussianNB Precision:': precision_score(y_test, y_pred_nb_cc_ada, zero_division= 0),
                    'SVM Precision:': precision_score(y_test, y_pred_svc_cc_ada, zero_division= 0),
                    'Random Forest Precision:': precision_score(y_test, y_pred_rf_cc_ada, zero_division= 0),
                    'Tuned Random Forest Precision:': precision_score(y_test, y_pred_tuned_rf_cc_ada, zero_division= 0),
                    'AdaBoost Precision:': precision_score(y_test, y_pred_ada_cc_ada, zero_division= 0),
                    'Tuned AdaBoost Precision:': precision_score(y_test, y_pred_tuned_ada_cc_ada, zero_division= 0),
                    'GradientBoosting Precision:': precision_score(y_test, y_pred_grb_cc_ada, zero_division= 0),
                    'Tuned GradientBoosting Precision:': precision_score(y_test, y_pred_tuned_grb_cc_ada, zero_division= 0),
                    'XGB Precision:': precision_score(y_test, y_pred_xgb_cc_ada, zero_division= 0),
                    'Tuned XGB Precision:': precision_score(y_test, y_pred_tuned_xgb_cc_ada, zero_division= 0),
                    'LGBM Precision:': precision_score(y_test, y_pred_lgm_cc_ada, zero_division= 0),
                    'Tuned LGBM Precision:': precision_score(y_test, y_pred_tuned_lgm_cc_ada, zero_division= 0),
                    'CatBoost Precision:': precision_score(y_test, y_pred_cat_cc_ada, zero_division= 0),
                    'Tuned CatBoost Precision:': precision_score(y_test, y_pred_tuned_cat_cc_ada, zero_division= 0)
                  }

cc_ada_precision= pd.DataFrame(list(precision_scores.items()), columns= ['Model', 'Precision Score'])
cc_ada_precision= cc_ada_precision.sort_values(by= 'Precision Score', ascending=False)
print(cc_ada_precision)

                                    Model  Precision Score
7                Random Forest Precision:         0.376543
6                          SVM Precision:         0.375635
8          Tuned Random Forest Precision:         0.368505
15                        LGBM Precision:         0.368344
14                   Tuned XGB Precision:         0.368040
4   Tuned KNeighborsClassifier Precision:         0.367061
9                     AdaBoost Precision:         0.366425
10              Tuned AdaBoost Precision:         0.365486
2          Tuned Decision Tree Precision:         0.364934
12      Tuned GradientBoosting Precision:         0.364780
17                    CatBoost Precision:         0.364315
16                  Tuned LGBM Precision:         0.363039
11            GradientBoosting Precision:         0.362439
5                   GaussianNB Precision:         0.362438
1                Decision Tree Precision:         0.361284
13                         XGB Precision:         0.3603

In [57]:
accuracy_scores= {
                    'Logistic Regression Accuracy:': accuracy_score(y_test, y_pred_lr_cc_ada),
                    'Decision Tree Accuracy:': accuracy_score(y_test, y_pred_dt_cc_ada),
                    'Tuned Decision Tree Accuracy:': accuracy_score(y_test, y_pred_tuned_dt_cc_ada),
                    'KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_knn_cc_ada),
                    'Tuned KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_tuned_knn_cc_ada),
                    'GaussianNB Accuracy:': accuracy_score(y_test, y_pred_nb_cc_ada),
                    'SVM Accuracy:': accuracy_score(y_test, y_pred_svc_cc_ada),
                    'Random Forest Accuracy:': accuracy_score(y_test, y_pred_rf_cc_ada),
                    'Tuned Random Forest Accuracy:': accuracy_score(y_test, y_pred_tuned_rf_cc_ada),
                    'AdaBoost Accuracy:': accuracy_score(y_test, y_pred_ada_cc_ada),
                    'Tuned AdaBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_ada_cc_ada),
                    'GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_grb_cc_ada),
                    'Tuned GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_tuned_grb_cc_ada),
                    'XGB Accuracy:': accuracy_score(y_test, y_pred_xgb_cc_ada),
                    'Tuned XGB Accuracy:': accuracy_score(y_test, y_pred_tuned_xgb_cc_ada),
                    'LGBM Accuracy:': accuracy_score(y_test, y_pred_lgm_cc_ada),
                    'Tuned LGBM Accuracy:': accuracy_score(y_test, y_pred_tuned_lgm_cc_ada),
                    'CatBoost Accuracy:': accuracy_score(y_test, y_pred_cat_cc_ada),
                    'Tuned CatBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_cat_cc_ada)
                  }

cc_ada_accuracy= pd.DataFrame(list(accuracy_scores.items()), columns= ['Model', 'Accuracy Score'])
cc_ada_accuracy= cc_ada_accuracy.sort_values(by= 'Accuracy Score', ascending=False)
print(cc_ada_accuracy)

                                   Model  Accuracy Score
6                          SVM Accuracy:        0.613805
3         KNeighborsClassifier Accuracy:        0.529378
4   Tuned KNeighborsClassifier Accuracy:        0.500856
2          Tuned Decision Tree Accuracy:        0.490588
1                Decision Tree Accuracy:        0.488876
0          Logistic Regression Accuracy:        0.481460
7                Random Forest Accuracy:        0.459213
8          Tuned Random Forest Accuracy:        0.454079
9                     AdaBoost Accuracy:        0.452938
5                   GaussianNB Accuracy:        0.451226
10              Tuned AdaBoost Accuracy:        0.438106
15                        LGBM Accuracy:        0.435824
14                   Tuned XGB Accuracy:        0.433542
13                         XGB Accuracy:        0.431831
16                  Tuned LGBM Accuracy:        0.427838
12      Tuned GradientBoosting Accuracy:        0.420993
11            GradientBoosting 

In [58]:
auc_scores= {
                    'Logistic Regression AUC:': roc_auc_score(y_test, y_pred_prob_lr_cc_ada),
                    'Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_dt_cc_ada),
                    'Tuned Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_tuned_dt_cc_ada),
                    'KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_knn_cc_ada),
                    'Tuned KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_tuned_knn_cc_ada),
                    'GaussianNB AUC:': roc_auc_score(y_test, y_pred_prob_nb_cc_ada),
                    'SVM AUC:': roc_auc_score(y_test, y_pred_prob_svc_cc_ada),
                    'Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_rf_cc_ada),
                    'Tuned Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_tuned_rf_cc_ada),
                    'AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_ada_cc_ada),
                    'Tuned AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_ada_cc_ada),
                    'GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_grb_cc_ada),
                    'Tuned GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_tuned_grb_cc_ada),
                    'XGB AUC:': roc_auc_score(y_test, y_pred_prob_xgb_cc_ada),
                    'Tuned XGB AUC:': roc_auc_score(y_test, y_pred_prob_tuned_xgb_cc_ada),
                    'LGBM AUC:': roc_auc_score(y_test, y_pred_prob_lgm_cc_ada),
                    'Tuned LGBM AUC:': roc_auc_score(y_test, y_pred_prob_tuned_lgm_cc_ada),
                    'CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_cat_cc_ada),
                    'Tuned CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_cat_cc_ada)
                  }

cc_ada_auc= pd.DataFrame(list(auc_scores.items()), columns= ['Model', 'AUC Score'])
cc_ada_auc= cc_ada_auc.sort_values(by= 'AUC Score', ascending=False)
print(cc_ada_auc)

                              Model  AUC Score
7                Random Forest AUC:   0.520934
8          Tuned Random Forest AUC:   0.520545
12      Tuned GradientBoosting AUC:   0.509321
15                        LGBM AUC:   0.509146
3         KNeighborsClassifier AUC:   0.505349
5                   GaussianNB AUC:   0.504887
1                Decision Tree AUC:   0.503644
13                         XGB AUC:   0.503323
18              Tuned CatBoost AUC:   0.503155
9                     AdaBoost AUC:   0.501863
10              Tuned AdaBoost AUC:   0.501655
2          Tuned Decision Tree AUC:   0.500700
4   Tuned KNeighborsClassifier AUC:   0.500349
6                          SVM AUC:   0.499387
14                   Tuned XGB AUC:   0.497238
0          Logistic Regression AUC:   0.497139
16                  Tuned LGBM AUC:   0.496544
11            GradientBoosting AUC:   0.495326
17                    CatBoost AUC:   0.491592
