<a href="https://colab.research.google.com/github/GeraldL19/loan_default_prediction/blob/main/Modelling_06_01_24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Metrics
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay, classification_report, f1_score , fbeta_score

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

#Imbalance
from imblearn.combine import *
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

In [None]:
#Import clean dataset from GoogleDrive
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/clean_data.csv", index_col=0)
df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,loan_status
0,24.0,49000.0,RENT,8.0,DEBTCONSOLIDATION,D,4400.0,16.770000,0.09,Y,3.0,1
1,39.0,91992.0,MORTGAGE,0.0,HOMEIMPROVEMENT,A,3350.0,5.420000,0.04,N,11.0,0
2,29.0,55000.0,MORTGAGE,11.0,PERSONAL,B,4000.0,10.990000,0.07,N,8.0,0
3,23.0,48600.0,MORTGAGE,7.0,PERSONAL,B,19000.0,11.360000,0.39,N,4.0,0
4,22.0,50000.0,OWN,4.0,VENTURE,B,16000.0,9.450000,0.32,N,4.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
32403,23.0,55000.0,MORTGAGE,7.0,MEDICAL,A,8000.0,7.490000,0.15,N,4.0,0
32404,22.0,54800.0,OWN,7.0,EDUCATION,A,8000.0,6.697063,0.15,N,2.0,0
32405,34.0,87360.0,MORTGAGE,11.0,PERSONAL,B,12000.0,11.110000,0.14,N,5.0,0
32406,38.0,61000.0,RENT,0.0,EDUCATION,B,1000.0,9.450000,0.02,N,12.0,0


In [None]:
#Encode categorical values into nnumerical values
df = df.replace({'RENT': 0, 'OWN': 1, 'MORTGAGE': 2, 'OTHER': 3})
df = df.replace({'PERSONAL': 0, 'EDUCATION': 1, 'MEDICAL': 2, 'VENTURE': 3, 'HOMEIMPROVEMENT': 4, 'DEBTCONSOLIDATION': 5})
df = df.replace({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6})
df = df.replace({'Y': 0, 'N': 1})

In [None]:
# List of numerical column
cols_to_norm = ['person_age','person_income','person_emp_length','loan_amnt','loan_int_rate','loan_percent_income','cb_person_cred_hist_length']
# Scaling numerical variables
df[cols_to_norm] = StandardScaler().fit_transform(df[cols_to_norm])

In [None]:
#Split the datset X independent variables and y dependent variable (target)
X = df.drop('loan_status', axis=1)
y = df['loan_status']

# Spot Checking Algorithms

In [None]:
# Split the data into training and testing sets (70/30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# List of models to try
models = {
    'Logistic Regression' : LogisticRegression(),
    'Naive Bayes' : GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'XGBoost': XGBClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'LightGBM': LGBMClassifier()
}

# Initialize a DataFrame to store the results
results_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Recall', 'F1 Score', 'F2 Score'])

# Train and evaluate each model
for model_name, model in models.items():
    print(f"Training and evaluating {model_name}...")

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    f2 = fbeta_score(y_test, y_pred, beta=2)

    # Append the results to the DataFrame
    results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)

# Display the results DataFrame
print("\nResults:")
results_df

Training and evaluating Logistic Regression...
Training and evaluating Naive Bayes...
Training and evaluating Decision Tree...


  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)
  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)
  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


Training and evaluating Random Forest...


  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


Training and evaluating SVM...


  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


Training and evaluating XGBoost...


  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


Training and evaluating Gradient Boosting...


  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


Training and evaluating LightGBM...
[LightGBM] [Info] Number of positive: 4887, number of negative: 17798
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001047 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1139
[LightGBM] [Info] Number of data points in the train set: 22685, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.215429 -> initscore=-1.292507
[LightGBM] [Info] Start training from score -1.292507

Results:
                 Model  Accuracy    Recall  F1 Score  F2 Score
0  Logistic Regression  0.847064  0.474330  0.584056  0.512871
1          Naive Bayes  0.801707  0.642435  0.594617  0.622414
2        Decision Tree  0.887175  0.766015  0.754531  0.761380
3        Random Forest  0.931400  0.721036  0.826347  0.759766
4                  SVM  0.886866  0.583371  0.700109  0.625061
5              XGBo

  results_df = results_df.append({'Model': model_name, 'Accuracy': accuracy, 'Recall': recall, 'F1 Score': f1, 'F2 Score': f2}, ignore_index=True)


In [None]:
#Higlight the best performers
styled_df = results_df.style.highlight_max(axis=0, color='yellow', subset=['Accuracy', 'Recall', 'F1 Score', 'F2 Score'])
# Display the results DataFrame
print("\nResults:")
styled_df


Results:


Unnamed: 0,Model,Accuracy,Recall,F1 Score,F2 Score
0,Logistic Regression,0.847064,0.47433,0.584056,0.512871
1,Naive Bayes,0.801707,0.642435,0.594617,0.622414
2,Decision Tree,0.887175,0.766015,0.754531,0.76138
3,Random Forest,0.9314,0.721036,0.826347,0.759766
4,SVM,0.886866,0.583371,0.700109,0.625061
5,XGBoost,0.933457,0.74557,0.835327,0.779054
6,Gradient Boosting,0.924817,0.710586,0.810573,0.747467
7,LightGBM,0.936542,0.734212,0.839699,0.773058


*   Looking at this we can see that XGBoost and LightGBM have the best F2 score at around 77%. Their recall is also pretty similar.
*   Random forest also look promising as it has metrix that are balanced and not far behind the 2 best performer.
*   Decision Tree has the best recall which is important in our case as we aim to predict if an applicant is at risk of behing rejected. Furthere to this DT is easily interpretable which is good if we want to explain to the applicant why his application might be rejected.

We decide to go ahead with this four algorythms to the next stage.

# Spot Checking Resampling methods

We can see from the lo recaal that we have imbalance and bias. This is showed by the loa recall % and the much lower F2 score.

Therefore we nee to explore ways of dealing with the accuracy paradox.

We will experiment with resampling method such as SMOTE, SMOTE and Tomek and algorithm level methods, to help mitigate the bias toward the majority class.

In [None]:
# List of models to try
models = {
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'XGBoost': XGBClassifier(),
    'LightGBM': LGBMClassifier()
}

# SMOTE

In [None]:
#Setting SMOTE
over_sampling = SMOTE()

#Applying smote to training set
X_smote, y_smote = over_sampling.fit_resample(X, y)

# Split the data into training and testing sets
X_train_smote, X_test_smote, y_train_smote, y_test_smote = train_test_split(X_smote, y_smote, test_size=0.3, random_state=42)

In [None]:
# Initialize a DataFrame to store the results
results_smote_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Recall', 'F1 Score', 'F2 Score'])

# Train and evaluate each model
for model_name, model in models.items():
    print(f"Training and evaluating {model_name}...")

    # Train the model
    model.fit(X_train_smote, y_train_smote)

    # Make predictions on the test set
    y_pred_smote = model.predict(X_test_smote)

    # Evaluate the model
    accuracy_smote = accuracy_score(y_test_smote, y_pred_smote)
    recall_smote = recall_score(y_test_smote, y_pred_smote)
    f1_smote = f1_score(y_test_smote, y_pred_smote)
    f2_smote = fbeta_score(y_test_smote, y_pred_smote, beta=2)

    # Append the results to the DataFrame
    results_smote_df = results_smote_df.append({'Model': model_name, 'Accuracy': accuracy_smote, 'Recall': recall_smote, 'F1 Score': f1_smote, 'F2 Score': f2_smote}, ignore_index=True)

Training and evaluating Random Forest...


  results_smote_df = results_smote_df.append({'Model': model_name, 'Accuracy': accuracy_smote, 'Recall': recall_smote, 'F1 Score': f1_smote, 'F2 Score': f2_smote}, ignore_index=True)


Training and evaluating Decision Tree...


  results_smote_df = results_smote_df.append({'Model': model_name, 'Accuracy': accuracy_smote, 'Recall': recall_smote, 'F1 Score': f1_smote, 'F2 Score': f2_smote}, ignore_index=True)


Training and evaluating XGBoost...


  results_smote_df = results_smote_df.append({'Model': model_name, 'Accuracy': accuracy_smote, 'Recall': recall_smote, 'F1 Score': f1_smote, 'F2 Score': f2_smote}, ignore_index=True)


Training and evaluating LightGBM...
[LightGBM] [Info] Number of positive: 17741, number of negative: 17707
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001232 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1804
[LightGBM] [Info] Number of data points in the train set: 35448, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500480 -> initscore=0.001918
[LightGBM] [Info] Start training from score 0.001918


  results_smote_df = results_smote_df.append({'Model': model_name, 'Accuracy': accuracy_smote, 'Recall': recall_smote, 'F1 Score': f1_smote, 'F2 Score': f2_smote}, ignore_index=True)


In [None]:
#Higlight the best performers
styled_smote_df = results_smote_df.style.highlight_max(axis=0, color='yellow', subset=['Accuracy', 'Recall', 'F1 Score', 'F2 Score'])
# Display the results DataFrame
print("\nResults:")
styled_smote_df


Results:


Unnamed: 0,Model,Accuracy,Recall,F1 Score,F2 Score
0,Random Forest,0.932794,0.894709,0.929987,0.908494
1,Decision Tree,0.883425,0.885077,0.883387,0.8844
2,XGBoost,0.948657,0.914633,0.946736,0.927209
3,LightGBM,0.947407,0.909091,0.945195,0.923196


# SMOTE and Tomek

In [None]:
#Setting SMOTETomek
smt = SMOTETomek(random_state=42)

#Applying smotetomek to training set
X_smote_tek, y_smote_tek = smt.fit_resample(X, y)

# Split the data into training and testing sets
X_train_tek, X_test_tek, y_train_tek, y_test_tek = train_test_split(X_smote_tek, y_smote_tek, test_size=0.3, random_state=42)

In [None]:
# Initialize a DataFrame to store the results
results_tek_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Recall', 'F1 Score', 'F2 Score'])

# Train and evaluate each model
for model_name, model in models.items():
    print(f"Training and evaluating {model_name}...")

    # Train the model
    model.fit(X_train_tek, y_train_tek)

    # Make predictions on the test set
    y_pred_tek = model.predict(X_test_tek)

    # Evaluate the model
    accuracy_tek= accuracy_score(y_test_tek, y_pred_tek)
    recall_tek = recall_score(y_test_tek, y_pred_tek)
    f1_tek = f1_score(y_test_tek, y_pred_tek)
    f2_tek = fbeta_score(y_test_tek, y_pred_tek, beta=2)

    # Append the results to the DataFrame
    results_tek_df = results_tek_df.append({'Model': model_name, 'Accuracy': accuracy_tek, 'Recall': recall_tek, 'F1 Score': f1_tek, 'F2 Score': f2_tek}, ignore_index=True)

Training and evaluating Random Forest...


  results_tek_df = results_tek_df.append({'Model': model_name, 'Accuracy': accuracy_tek, 'Recall': recall_tek, 'F1 Score': f1_tek, 'F2 Score': f2_tek}, ignore_index=True)


Training and evaluating Decision Tree...


  results_tek_df = results_tek_df.append({'Model': model_name, 'Accuracy': accuracy_tek, 'Recall': recall_tek, 'F1 Score': f1_tek, 'F2 Score': f2_tek}, ignore_index=True)


Training and evaluating XGBoost...


  results_tek_df = results_tek_df.append({'Model': model_name, 'Accuracy': accuracy_tek, 'Recall': recall_tek, 'F1 Score': f1_tek, 'F2 Score': f2_tek}, ignore_index=True)


Training and evaluating LightGBM...
[LightGBM] [Info] Number of positive: 17660, number of negative: 17445
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001226 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1804
[LightGBM] [Info] Number of data points in the train set: 35105, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503062 -> initscore=0.012249
[LightGBM] [Info] Start training from score 0.012249


  results_tek_df = results_tek_df.append({'Model': model_name, 'Accuracy': accuracy_tek, 'Recall': recall_tek, 'F1 Score': f1_tek, 'F2 Score': f2_tek}, ignore_index=True)


In [None]:
#Higlight the best performers
styled_tek_df = results_tek_df.style.highlight_max(axis=0, color='yellow', subset=['Accuracy', 'Recall', 'F1 Score', 'F2 Score'])
# Display the results DataFrame
print("\nResults:")
styled_tek_df


Results:


Unnamed: 0,Model,Accuracy,Recall,F1 Score,F2 Score
0,Random Forest,0.942306,0.912744,0.939739,0.923354
1,Decision Tree,0.888933,0.8971,0.888414,0.893606
2,XGBoost,0.95447,0.922454,0.952315,0.934171
3,LightGBM,0.953141,0.91679,0.950703,0.930061


# Cost-Sensitive Decision Trees

In [None]:
# Split the data into training and testing sets
X_train_csdt, X_test_csdt, y_train_csdt, y_test_csdt = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# define model
csdt = DecisionTreeClassifier(class_weight='balanced')

In [None]:
# Initialize a DataFrame to store the results
results_tek_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Recall', 'F1 Score', 'F2 Score'])

# Train the model
csdt.fit(X_train_csdt, y_train_csdt)

# Make predictions on the test set
y_pred_csdt = model.predict(X_test_csdt)

# Evaluate the model
accuracy_csdt= accuracy_score(y_test_csdt, y_pred_csdt)
recall_csdt = recall_score(y_test_csdt, y_pred_csdt)
f1_csdt = f1_score(y_test_csdt, y_pred_csdt)
f2_csdt = fbeta_score(y_test_csdt, y_pred_csdt, beta=2)

print(f'Accuracy : {accuracy_csdt}')
print(f'Recall : {recall_csdt}')
print(f'F1 Score : {f1_csdt}')
print(f'F2 Score : {f2_csdt}\n')
print(f'Confusion Matrix:\n{conf_matrix_boost}')

Accuracy : 0.9378792553738559
Recall : 0.7555656519763744
F1 Score : 0.8463104325699745
F2 Score : 0.7894237159403779



# RUSBoostClassifier

In [None]:
from imblearn.ensemble import RUSBoostClassifier

In [None]:
# Split the data into training and testing sets
X_train_rboost, X_test_rboost, y_train_rboost, y_test_rboost = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a base decision tree classifier (you can use any classifier)
base_classifier = DecisionTreeClassifier()

# Create the RUSBoostClassifier
rusboost_classifier = RUSBoostClassifier(base_classifier, random_state=42)

In [None]:
# Train the model
rusboost_classifier.fit(X_train_rboost, y_train_rboost)

# Make predictions on the test set
y_pred_rboost = rusboost_classifier.predict(X_test_rboost)

# Evaluate the model
accuracy_rboost = accuracy_score(y_test_rboost, y_pred_rboost)
recall_rboost = recall_score(y_test_rboost, y_pred_rboost)
f1_rboost = f1_score(y_test_rboost, y_pred_rboost)
f2_rboost = fbeta_score(y_test_rboost, y_pred_rboost, beta=2)
conf_matrix_boost = confusion_matrix(y_test_rboost, y_pred_rboost)

print(f'Accuracy: {accuracy_rboost}')
print(f'Recall : {recall_rboost}')
print(f'F1 Score : {f1_rboost}')
print(f'F2 Score : {f2_rboost}\n')
print(f'Confusion Matrix:\n{conf_matrix_boost}')

Accuracy: 0.8937570708629023
Recall : 0.805542935029532
F1 Score : 0.7744048918977942
F2 Score : 0.7927919871221606

Confusion Matrix:
[[6917  605]
 [ 428 1773]]


# HyperParameter Tuning

We take the best overall performing algorythms and resampling method and use a randomiser to find the best parameters for the model

# RandomSearch

In [None]:
#Setting SMOTETomek
smt = SMOTETomek(random_state=42)

#Applying smotetomek to training set
X_smote_tek, y_smote_tek = smt.fit_resample(X, y)

# Split the data into training and testing sets
X_train_tek, X_test_tek, y_train_tek, y_test_tek = train_test_split(X_smote_tek, y_smote_tek, test_size=0.3, random_state=42)

In [None]:
# Define the hyperparameter grid for RandomizedSearchCV
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
    'n_estimators': [50, 100, 200, 400, 500],
    'max_depth': [3, 5, 7, 9, 11],
    'min_child_weight': [1, 2, 3, 4, 5],
    'subsample': [0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
    'gamma': [0, 0.5, 1, 1.5, 2],
    'reg_alpha': [0, 0.1, 0.5, 1, 10],
    'reg_lambda': [0, 0.1, 0.5, 1, 10]
}

# Create an XGBoost classifier
xgb_classifier = XGBClassifier()

# Create RandomizedSearchCV object
random_search = RandomizedSearchCV(xgb_classifier, param_distributions=param_grid,
                                   n_iter=10, scoring='recall', cv=3, random_state=42)

# Fit the model with hyperparameter tuning
random_search.fit(X_train_tek, y_train_tek)

# Get the best model
best_xgb = random_search.best_estimator_

# Make predictions on the test set
y_pred_tek = best_xgb.predict(X_test_tek)

# Evaluate the model
accuracy_rboost = accuracy_score(y_test_tek, y_pred_tek)
recall_rboost = recall_score(y_test_tek, y_pred_tek)
f1_rboost = f1_score(y_test_tek, y_pred_tek)
f2_rboost = fbeta_score(y_test_tek, y_pred_tek, beta=2)
conf_matrix_boost = confusion_matrix(y_test_tek, y_pred_tek)

print(f'Accuracy: {accuracy_rboost}')
print(f'Recall : {recall_rboost}')
print(f'F1 Score : {f1_rboost}')
print(f'F2 Score : {f2_rboost}\n')
print(f'Confusion Matrix:\n{conf_matrix_boost}')

# Print classification report
print("Classification Report:")
print(classification_report(y_test_tek, y_pred_tek))

Accuracy: 0.9559986706547026
Recall : 0.933378287255563
F1 Score : 0.954357418643133
F2 Score : 0.9416582764156848

Confusion Matrix:
[[7462  168]
 [ 494 6921]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96      7630
           1       0.98      0.93      0.95      7415

    accuracy                           0.96     15045
   macro avg       0.96      0.96      0.96     15045
weighted avg       0.96      0.96      0.96     15045



In [None]:
random_search.best_params_

{'subsample': 0.7,
 'reg_lambda': 0.5,
 'reg_alpha': 1,
 'n_estimators': 200,
 'min_child_weight': 1,
 'max_depth': 11,
 'learning_rate': 0.2,
 'gamma': 0.5,
 'colsample_bytree': 0.7}

In [None]:
random_search.best_estimator_

#GridSearch

In [None]:
# Define the hyperparameter grid for RandomizedSearchCV
# Define a smaller hyperparameter grid for GridSearchCV
param_grid = {
    'learning_rate': [0.01, 0.1],
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'min_child_weight': [1, 3],
    'gamma': [0, 1],
    'reg_alpha': [0, 0.1],
    'reg_lambda': [0, 0.1]
}

# Create an XGBoost classifier
xgb_classifier = XGBClassifier()

# Create GridSearchCV object
grid_search = GridSearchCV(xgb_classifier, param_grid=param_grid,
                           scoring='accuracy', cv=3)

# Fit the model with hyperparameter tuning
grid_search.fit(X_train_tek, y_train_tek)

# Get the best model
best_xgb = grid_search.best_estimator_

# Make predictions on the test set
y_pred_tek = best_xgb.predict(X_test_tek)

# Evaluate the model
accuracy_rboost = accuracy_score(y_test_tek, y_pred_tek)
recall_rboost = recall_score(y_test_tek, y_pred_tek)
f1_rboost = f1_score(y_test_tek, y_pred_tek)
f2_rboost = fbeta_score(y_test_tek, y_pred_tek, beta=2)
conf_matrix_boost = confusion_matrix(y_test_tek, y_pred_tek)

print(f'Accuracy: {accuracy_rboost}')
print(f'Recall : {recall_rboost}')
print(f'F1 Score : {f1_rboost}')
print(f'F2 Score : {f2_rboost}\n')
print(f'Confusion Matrix:\n{conf_matrix_boost}')

# Print classification report
print("Classification Report:")
print(classification_report(y_test_tek, y_pred_tek))

Accuracy: 0.9494184114323696
Recall : 0.9109912339851652
F1 Score : 0.9466750753275874
F2 Score : 0.9249370139117099

Confusion Matrix:
[[7529  101]
 [ 660 6755]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      7630
           1       0.99      0.91      0.95      7415

    accuracy                           0.95     15045
   macro avg       0.95      0.95      0.95     15045
weighted avg       0.95      0.95      0.95     15045

