# Final Report - Group 10

### Libaray

In [140]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Resampling
from imblearn.over_sampling import SMOTE

# Feature selection
from sklearn.feature_selection import SelectFromModel

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import StackingClassifier

# Tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope

# Evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import (confusion_matrix, auc,
                             roc_curve, roc_auc_score, classification_report, f1_score)
from sklearn.model_selection import GridSearchCV


pd.options.display.max_columns = 999
%matplotlib inline
color = sns.color_palette()
sns.set_style('darkgrid')

## Bookmark

<a href=#p0>0. Feature Selection</a>

<a href=#p1>1. Modeling - Baseline</a>

<a href=#p2>2. Modeling - Tuning</a>

<a href=#p3>3. Modeling - Stacking</a>

<a href=#p4>4. Experiment on Feature Selection</a>

## Read data

In [2]:
train = pd.read_csv("train_set.csv",index_col=0)
test = pd.read_csv("test_set.csv",index_col=0)

In [3]:
unique, counts = np.unique(train.y, return_counts=True)
cnt_dict = dict(zip(unique, counts/train.shape[0]))
pd.DataFrame(list(cnt_dict.items()), columns=['Class', 'Percentage'])

Unnamed: 0,Class,Percentage
0,0,0.887343
1,1,0.112657


In [4]:
def drop_cols(df):
    cols = ['pdays','previous']
    df.drop(cols,axis=1,inplace=True)
    return df

In [5]:
train = drop_cols(train)
test = drop_cols(test)

In [6]:
X_train = train.drop('y',axis=1)
y_train = train['y']
X_test = test.drop('y',axis=1)
y_test = test['y']

print(X_train.shape)
print(X_test.shape)

(28831, 65)
(12357, 65)


<a name='p0' /></a>
## 0. Feature Selection

#### Use multiple models to select key features by importance

In [7]:
models = {
    "Logistic": LogisticRegression(solver='liblinear'),
    
    "RandomForestClassifier":RandomForestClassifier(),

    "XGBooostClassifier": XGBClassifier(),
    
    "LightGBMClassifier": LGBMClassifier()

}


feature_selection = {}
feature_names = np.array(X_train.columns)

for model_name in models:
    model = models[model_name]
    selector = SelectFromModel(estimator=model,threshold=-1, max_features=30).fit(X_train, y_train)
    feature_selection[model_name] = feature_names[selector.get_support()]
    print("{}{}{}".format(model_name, ": ", 'done'))

Logistic: done
RandomForestClassifier: done
XGBooostClassifier: done
LightGBMClassifier: done


#### select features using model votes >=2

In [8]:
feature_count = dict(zip(feature_names,np.zeros(len(feature_names))))
for i in feature_names:
    for j in feature_selection:
        if i in feature_selection[j]:
            feature_count[i]+=1
feature_count = pd.Series(feature_count).sort_values(ascending=False)

In [12]:
feature_count = dict(zip(feature_names,np.zeros(len(feature_names))))
for i in feature_names:
    for j in feature_selection:
        if i in feature_selection[j]:
            feature_count[i]+=1
feature_count = pd.Series(feature_count).sort_values(ascending=False)

# select by feature count
features_selected = feature_count[feature_count>=2].index.tolist()
features_selected

['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign',
 'age_bin',
 'month_oct',
 'poutcome_nonexistent',
 'day_of_week_mon',
 'day_of_week_fri',
 'month_may',
 'contact',
 'job_0.0',
 'kmeans_avgy_5',
 'marital_2.0',
 'class_kmeans_4',
 'effect_euribor3m',
 'marital_1.0',
 'pdays_999_or_not',
 'month_mar',
 'month_sep',
 'job_9.0',
 'day_of_week_thu',
 'day_of_week_tue',
 'day_of_week_wed',
 'liability',
 'education_cluster_avgy',
 'previous_zero_or_not',
 'job_5.0',
 'housing']

In [7]:
features_selected=['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign',
 'age_bin',
 'month_oct',
 'poutcome_nonexistent',
 'day_of_week_mon',
 'day_of_week_fri',
 'month_may',
 'contact',
 'job_0.0',
 'kmeans_avgy_5',
 'marital_2.0',
 'class_kmeans_4',
 'effect_euribor3m',
 'marital_1.0',
 'pdays_999_or_not',
 'month_mar',
 'month_sep',
 'job_9.0',
 'day_of_week_thu',
 'day_of_week_tue',
 'day_of_week_wed',
 'liability',
 'education_cluster_avgy',
 'previous_zero_or_not',
 'job_5.0',
 'housing']

In [9]:
X_train_selected = train[features_selected]
X_test_selected = test[features_selected]

<a name='p1' /></a>
## 1. Modeling - Baseline

### SMOTE

In [13]:
smote = SMOTE()
X_train_selected_sm ote, y_train_smote = smote.fit_resample(X_train_selected, y_train)

In [62]:
def Clf_train(classifier):    
    # classifier
    clf = classifier
    # fit data
    clf.fit(X_train_selected_smote, y_train_smote)
    # get predictions
    y_pred = clf.predict(X_test_selected) # get predicted class
    y_proba = clf.predict_proba(X_test_selected) # get predicted prob.
        
    # generate confusion matrix
    cm = pd.DataFrame(confusion_matrix(y_true=y_test, y_pred=y_pred), \
                      index=['Actual: 0', 'Actual: 1'], \
                      columns=['Pred: 0', 'Pred: 1'])
    
    # auc
    fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_proba[:,1])
    auc_score = auc(fpr, tpr)
    
    # f1-score
    f1 = f1_score(y_true=y_test, y_pred=y_pred, average='macro')
    
    return cm, auc_score, f1

### Baseline - Naive Bayesian

In [63]:
nb = GaussianNB()

# build model
cm_nb, auc_nb, f1_nb = Clf_train(nb)

print("Confusion Matrix: \n\n", cm_nb)
print("\n")
print(f"AUC Score: {auc_nb}")
print(f"f1 Score: {f1_nb}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0     7663     3302
Actual: 1      161     1231


AUC Score: 0.8986973966277234
f1 Score: 0.6156087287808447


###  Logistic Regression

In [64]:
lr = LogisticRegression(solver='lbfgs', max_iter=100)

# build model
cm_lr, auc_lr, f1_lr = Clf_train(lr)

print("Confusion Matrix: \n\n", cm_lr)
print("\n")
print(f"AUC Score: {auc_lr}")
print(f"f1 Score: {f1_lr}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0     8950     2015
Actual: 1      458      934


AUC Score: 0.8455806353549171
f1 Score: 0.6544647235756663


###  Random Forest

In [65]:
rf = RandomForestClassifier()

# build model
cm_rf, auc_rf, f1_rf = Clf_train(rf)

print("Confusion Matrix: \n\n", cm_rf)
print("\n")
print(f"AUC Score: {auc_rf}")
print(f"f1 Score: {f1_rf}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10480      485
Actual: 1      632      760


AUC Score: 0.9381300087530333
f1 Score: 0.7629084737702868


### XGBoost

In [66]:
xgb = XGBClassifier()

# build model
cm_xgb, auc_xgb, f1_xgb = Clf_train(xgb)

print("Confusion Matrix: \n\n", cm_xgb)
print("\n")
print(f"AUC Score: {auc_xgb}")
print(f"f1 Score: {f1_xgb}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10506      459
Actual: 1      628      764


AUC Score: 0.9446283498697527
f1 Score: 0.7675667388287557


### LightGBM

In [67]:
lgb = LGBMClassifier()

# build model
cm_lgb, auc_lgb, f1_lgb = Clf_train(lgb)

print("Confusion Matrix: \n\n", cm_lgb)
print("\n")
print(f"AUC Score: {auc_lgb}")
print(f"f1 Score: {f1_lgb}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10507      458
Actual: 1      565      827


AUC Score: 0.9476461153828011
f1 Score: 0.7857169409907938


<a name='p2' /></a>
## 2. Modeling - Tuning


#### NB

In [50]:
nb_classifier = GaussianNB()

params_NB = {'var_smoothing': np.logspace(0,-9, num=100)}

gs_NB = GridSearchCV(estimator=nb_classifier, 
                 param_grid=params_NB, 
                 cv=5,
                 n_jobs = -1, 
                 scoring='roc_auc') 

gs_NB.fit(X_train_selected_smote, y_train_smote)

gs_NB.best_params_

{'var_smoothing': 1.873817422860387e-09}

In [70]:
nb = GaussianNB(var_smoothing=1.873817422860387e-09)

# build model
cm_nb, auc_nb, f1_nb = Clf_train(nb)

print("Confusion Matrix: \n\n", cm_nb)
print("\n")
print(f"AUC Score: {auc_nb}")
print(f"f1 Score: {f1_nb}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0     7647     3318
Actual: 1      145     1247


AUC Score: 0.9002948907443222
f1 Score: 0.6170213537155966


#### LR

In [53]:
lr_classifier = LogisticRegression()

params_LR = {
            'solver': ['liblinear'],
            'penalty': ['l2','l1'],
            'C': [100, 10, 1.0, 0.1, 0.01],
            'max_iter':[1000]}

gs_LR = GridSearchCV(estimator=lr_classifier, 
                 param_grid=params_LR, 
                 cv=5,
                 n_jobs = -1, 
                 scoring='roc_auc') 

gs_LR.fit(X_train_selected_smote, y_train_smote)

gs_LR.best_params_

{'C': 0.01, 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear'}

In [71]:

lr = LogisticRegression(C= 0.1, max_iter= 1000, penalty= 'l1', solver= 'liblinear')

# build model
cm_lr, auc_lr, f1_lr = Clf_train(lr)

print("Confusion Matrix: \n\n", cm_lr)
print("\n")
print(f"AUC Score: {auc_lr}")
print(f"f1 Score: {f1_lr}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10405      560
Actual: 1      619      773


AUC Score: 0.9214486008249866
f1 Score: 0.7568608657957001


####    RF

In [125]:
rf_classifier = RandomForestClassifier()

params_RF = {
     #'n_estimators': [380, 430, 480],
     #'max_depth':[3, 6], 
     #'min_samples_split':[3, 6],
     #'min_samples_leaf':[3, 5],
     #'max_features':[14, 17, 20]
     'n_estimators': [400,500,600],#300,400,500
     'max_depth':[4,8], #3,4 
     'min_samples_split':[2,3], #2,3
     'min_samples_leaf':[4,5],#4,5
     'max_features':[14,16] #14,16
}

gs_RF = GridSearchCV(estimator=rf_classifier, 
                 param_grid=params_RF, 
                 cv=5,
                 n_jobs = -1, 
                 scoring='roc_auc') 

gs_RF.fit(X_train_selected_smote, y_train_smote)

gs_RF.best_params_

{'max_depth': 8,
 'max_features': 14,
 'min_samples_leaf': 5,
 'min_samples_split': 2,
 'n_estimators': 600}

In [129]:
rf_para = gs_RF.best_params_

rf = RandomForestClassifier(
    n_estimators = rf_para['n_estimators'],
    max_depth = rf_para['max_depth'],
    #min_samples_split = rf_para['min_samples_split'],
    #min_samples_leaf = rf_para['min_samples_leaf'],
    max_features = rf_para['max_features']
)

# build model
cm_rf, auc_rf, f1_rf = Clf_train(rf)

print("Confusion Matrix: \n\n", cm_rf)
print("\n")
print(f"AUC Score: {auc_rf}")
print(f"f1 Score: {f1_rf}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0     9788     1177
Actual: 1      230     1162


AUC Score: 0.9416742666058671
f1 Score: 0.7779175118891475


#### XGB

In [108]:
xgb_classifier = XGBClassifier(eta = 0.1)

params_XGB = {
    'reg_alpha':[1e-5,1e-2],
    'subsample':[0.5,0.8],
    'colsample_bytree':[0.5,0.8],
    'gamma':[1.3],
    'max_depth':[4,8,10],
    'min_child_weight':[1,3],
    'n_estimators': [120,150]  
}

gs_XGB = GridSearchCV(estimator=xgb_classifier, 
                 param_grid=params_XGB, 
                 cv=5,
                 n_jobs = -1, 
                 scoring='roc_auc') 

gs_XGB.fit(X_train_selected_smote, y_train_smote)

xgb_para = gs_XGB.best_params_

In [119]:
xgb_para

{'colsample_bytree': 0.5,
 'gamma': 1,
 'max_depth': 10,
 'min_child_weight': 1,
 'n_estimators': 120,
 'reg_alpha': 0.01,
 'subsample': 0.5}

In [118]:
xgb = XGBClassifier(
    eta = 0.1,
    n_estimators = xgb_para['n_estimators'],
    #max_depth = xgb_para['max_depth'],
    #min_child_weight = xgb_para['min_child_weight'],
    gamma = xgb_para['gamma'],
    subsample = xgb_para['subsample'],
    colsample_bytree = xgb_para['colsample_bytree'],
    reg_alpha = xgb_para['reg_alpha']
)

# build model
cm_xgb,auc_xgb,f1_xgb = Clf_train(xgb)

print("Confusion Matrix: \n\n", cm_xgb)
print("\n")
print(f"AUC Score: {auc_xgb}")
print(f"f1 Score: {f1_xgb}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10483      482
Actual: 1      591      801


AUC Score: 0.9450005503404248
f1 Score: 0.7750960425719402


#### LGBM

In [136]:
lgbm_classifier = LGBMClassifier()

params_lgbm = {
    'learning_rate' : [0.05,0.1],
    #'max_depth':[6,10],
    #'min_child_samples': [23, 26],
    #'feature_fraction': [0.3, 0.4],
    #'num_leaves': [100,150],
    #'subsample': [0.4, 0.8], 
    #'colsample_bytree': [0.4, 0.6],
    'reg_alpha':[0.4,0.7],
    'reg_lambda':[0.4,0.7]

}

gs_lgbm = GridSearchCV(estimator=lgbm_classifier, 
                 param_grid=params_lgbm, 
                 cv=5,
                 n_jobs = -1, 
                 scoring='roc_auc') 

gs_lgbm.fit(X_train_selected_smote, y_train_smote)

lgbm_para = gs_lgbm.best_params_
lgbm_para

{'learning_rate': 0.1, 'reg_alpha': 0.4, 'reg_lambda': 0.7}

In [137]:
lgbm = LGBMClassifier(
    learning_rate = lgbm_para['learning_rate'],
    #colsample_bytree = lgbm_para['colsample_bytree'],
    #feature_fraction = lgbm_para['feature_fraction'],
    #num_leaves = lgbm_para['num_leaves'],
    #subsample = lgbm_para['subsample'],
    #max_depth = lgbm_para['max_depth'],
    #min_child_samples = lgbm_para['min_child_samples'],
    reg_alpha = lgbm_para['reg_alpha'],
    reg_lambda = lgbm_para['reg_lambda']
)

cm_lgbm, auc_lgbm,f1_lgbm = Clf_train(lgbm)

print("Confusion Matrix: \n\n", cm_lgbm)
print("\n")
print(f"AUC Score: {auc_lgbm}")
print(f"f1 Score: {f1_lgbm}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10489      476
Actual: 1      552      840


AUC Score: 0.9479568611726967
f1 Score: 0.7868347574335508


<a name='p3' /></a>
## 3. Modeling - Stacking

In [141]:
# compare ensemble to each baseline classifier
# get a stacking ensemble of models
def get_model():
    # define the level-0 base models with tuned paramters
    level0 = []
    level0.append(('lr', lr))
    level0.append(('nb', nb))
    level0.append(('rf', rf))
    level0.append(('xgb', xgb))
    level0.append(('lgbm', lgbm))
    # define level-1 meta learner model
    level1 = LogisticRegression()
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1)
    return model

In [142]:
stacking_model = get_model()
#stack = stacking_model.fit(X_train_smote, y_train_smote)
cm_stk, auc_stk, f1_stk = Clf_train(stacking_model)

print("Confusion Matrix: \n\n", cm_stk)
print("\n")
print(f"AUC Score: {auc_stk}")
print(f"f1 Score: {f1_stk}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10073      892
Actual: 1      312     1080


AUC Score: 0.9436705609803396
f1 Score: 0.7928496520535662


<a name='p4' /></a>
## 4. Experiment on Feature Selection

#### select features using model votes >=3

In [155]:
# select by feature count
features_selected_vote_3 = feature_count[feature_count>=3].index.tolist()
features_selected_vote_3

['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign',
 'age_bin',
 'month_oct',
 'poutcome_nonexistent',
 'day_of_week_mon',
 'day_of_week_fri',
 'month_may',
 'contact',
 'job_0.0',
 'kmeans_avgy_5',
 'marital_2.0',
 'class_kmeans_4',
 'effect_euribor3m',
 'marital_1.0']

In [156]:
features_selected_vote_3 = ['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign',
 'age_bin',
 'month_oct',
 'poutcome_nonexistent',
 'day_of_week_mon',
 'day_of_week_fri',
 'month_may',
 'contact',
 'job_0.0',
 'kmeans_avgy_5',
 'marital_2.0',
 'class_kmeans_4',
 'effect_euribor3m',
 'marital_1.0']

X_train_selected_vote_3 = train[features_selected_vote_3]
X_test_selected_vote_3 = test[features_selected_vote_3]

#### select features using model votes >=4

In [157]:
# select by feature count
features_selected_vote_4 = feature_count[feature_count>=4].index.tolist()
features_selected_vote_4

['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign']

In [171]:
features_selected_vote_4 = ['education',
 'poutcome_success',
 'duration',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'DJI_monthly_adjclose',
 'job_1.0',
 'CPI_DJI',
 'campaign']

X_train_selected_vote_4 = train[features_selected_vote_4]
X_test_selected_vote_4 = test[features_selected_vote_4]

#### Best Model with selected feature (vote=3)

In [159]:
smote = SMOTE()
X_train_selected_vote_3_smote, y_train_smote = smote.fit_resample(X_train_selected_vote_3, y_train)

In [160]:
def Clf_train_vote3(classifier):    
    # classifier
    clf = classifier
    # fit data
    clf.fit(X_train_selected_vote_3_smote, y_train_smote)
    # get predictions
    y_pred = clf.predict(X_test_selected_vote_3) # get predicted class
    y_proba = clf.predict_proba(X_test_selected_vote_3) # get predicted prob.
        
    # generate confusion matrix
    cm = pd.DataFrame(confusion_matrix(y_true=y_test, y_pred=y_pred), \
                      index=['Actual: 0', 'Actual: 1'], \
                      columns=['Pred: 0', 'Pred: 1'])
    
    # auc
    fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_proba[:,1])
    auc_score = auc(fpr, tpr)
    
    # f1-score
    f1 = f1_score(y_true=y_test, y_pred=y_pred, average='macro')
    
    return cm, auc_score, f1

In [161]:
lgbm = LGBMClassifier(
    learning_rate = lgbm_para['learning_rate'],
    #colsample_bytree = lgbm_para['colsample_bytree'],
    #feature_fraction = lgbm_para['feature_fraction'],
    #num_leaves = lgbm_para['num_leaves'],
    #subsample = lgbm_para['subsample'],
    #max_depth = lgbm_para['max_depth'],
    #min_child_samples = lgbm_para['min_child_samples'],
    reg_alpha = lgbm_para['reg_alpha'],
    reg_lambda = lgbm_para['reg_lambda']
)

cm_lgbm, auc_lgbm,f1_lgbm = Clf_train_vote3(lgbm)

print("Confusion Matrix: \n\n", cm_lgbm)
print("\n")
print(f"AUC Score: {auc_lgbm}")
print(f"f1 Score: {f1_lgbm}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0    10213      752
Actual: 1      419      973


AUC Score: 0.941425565147203
f1 Score: 0.7850488805725209


#### Best Model with selected feature (vote=4)

In [172]:
smote = SMOTE()
X_train_selected_vote_4_smote, y_train_vote_4_smote = smote.fit_resample(X_train_selected_vote_4, y_train)

In [173]:
def Clf_train_vote4(classifier):    
    # classifier
    clf = classifier
    # fit data
    clf.fit(X_train_selected_vote_4_smote, y_train_smote)
    # get predictions
    y_pred = clf.predict(X_test_selected_vote_4) # get predicted class
    y_proba = clf.predict_proba(X_test_selected_vote_4) # get predicted prob.
        
    # generate confusion matrix
    cm = pd.DataFrame(confusion_matrix(y_true=y_test, y_pred=y_pred), \
                      index=['Actual: 0', 'Actual: 1'], \
                      columns=['Pred: 0', 'Pred: 1'])
    
    # auc
    fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_proba[:,1])
    auc_score = auc(fpr, tpr)
    
    # f1-score
    f1 = f1_score(y_true=y_test, y_pred=y_pred, average='macro')
    
    return cm, auc_score, f1

In [174]:
lgbm = LGBMClassifier(
    learning_rate = lgbm_para['learning_rate'],
    #colsample_bytree = lgbm_para['colsample_bytree'],
    #feature_fraction = lgbm_para['feature_fraction'],
    #num_leaves = lgbm_para['num_leaves'],
    #subsample = lgbm_para['subsample'],
    #max_depth = lgbm_para['max_depth'],
    #min_child_samples = lgbm_para['min_child_samples'],
    reg_alpha = lgbm_para['reg_alpha'],
    reg_lambda = lgbm_para['reg_lambda']
)

cm_lgbm, auc_lgbm,f1_lgbm = Clf_train_vote4(lgbm)

print("Confusion Matrix: \n\n", cm_lgbm)
print("\n")
print(f"AUC Score: {auc_lgbm}")
print(f"f1 Score: {f1_lgbm}")

Confusion Matrix: 

            Pred: 0  Pred: 1
Actual: 0     9809     1156
Actual: 1      269     1123


AUC Score: 0.9385875775062764
f1 Score: 0.7720519552568631
