In [13]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree   import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    StratifiedKFold,
    cross_validate,
    GridSearchCV,
)
from sklearn.metrics import (
    confusion_matrix,
    f1_score,
    precision_score,
    accuracy_score,
    recall_score
)
warnings.filterwarnings('ignore')

In [14]:
X_train = np.load('Artifacts/X_train.npz')['arr_0']
Y_train = np.load('Artifacts/Y_train.npz')['arr_0']
X_test = np.load('Artifacts/X_test.npz')['arr_0']
Y_test = np.load('Artifacts/Y_test.npz')['arr_0']

In [15]:
model_rf = RandomForestClassifier(
    criterion='gini',
    n_estimators=500,
    max_depth=12,
)

model_cat = CatBoostClassifier(
    learning_rate = 0.1,
    l2_leaf_reg = 3,
    iterations = 500,
    depth = 8,
    border_count = 32
)

model_rf.fit(X_train,Y_train)
model_cat.fit(X_train,Y_train)

0:	learn: 0.6439381	total: 5.21ms	remaining: 2.6s
1:	learn: 0.6035205	total: 11ms	remaining: 2.74s
2:	learn: 0.5741011	total: 16.3ms	remaining: 2.69s
3:	learn: 0.5487396	total: 21.5ms	remaining: 2.67s
4:	learn: 0.5301091	total: 28.3ms	remaining: 2.81s
5:	learn: 0.5140700	total: 33.7ms	remaining: 2.78s
6:	learn: 0.5004477	total: 39.2ms	remaining: 2.76s
7:	learn: 0.4894097	total: 44.6ms	remaining: 2.74s
8:	learn: 0.4806407	total: 50ms	remaining: 2.73s
9:	learn: 0.4717440	total: 55.2ms	remaining: 2.7s
10:	learn: 0.4647835	total: 61.6ms	remaining: 2.74s
11:	learn: 0.4569974	total: 67ms	remaining: 2.72s
12:	learn: 0.4502381	total: 72.6ms	remaining: 2.72s
13:	learn: 0.4454040	total: 79.3ms	remaining: 2.75s
14:	learn: 0.4397651	total: 84.7ms	remaining: 2.74s
15:	learn: 0.4348753	total: 90ms	remaining: 2.72s
16:	learn: 0.4307755	total: 95.5ms	remaining: 2.71s
17:	learn: 0.4256385	total: 101ms	remaining: 2.7s
18:	learn: 0.4212151	total: 106ms	remaining: 2.69s
19:	learn: 0.4171346	total: 112ms	r

<catboost.core.CatBoostClassifier at 0x2531c502f50>

In [16]:

Y_hat_test_rf = model_rf.predict(X_test)

Y_hat_test_cat = model_cat.predict(X_test)


In [17]:
accuracy_lr = accuracy_score(Y_test, Y_hat_test_rf)
precision_lr = precision_score(Y_test, Y_hat_test_rf)
recall_lr = recall_score(Y_test, Y_hat_test_rf)
f1_lr = f1_score(Y_test, Y_hat_test_rf)

accuracy_cat = accuracy_score(Y_test, Y_hat_test_cat)
precision_cat = precision_score(Y_test, Y_hat_test_cat)
recall_cat = recall_score(Y_test, Y_hat_test_cat)
f1_cat = f1_score(Y_test, Y_hat_test_cat)

In [18]:
print("Logistic Regrssion:")
print(f"Accuracy: {accuracy_lr:.4f}")
print(f"Precision: {precision_lr:.4f}")
print(f"Recall: {recall_lr:.4f}")
print(f"F1 Score: {f1_lr:.4f}")

print("\nCatBoost:")
print(f"Accuracy: {accuracy_cat:.4f}")
print(f"Precision: {precision_cat:.4f}")
print(f"Recall: {recall_cat:.4f}")
print(f"F1 Score: {f1_cat:.4f}")

Logistic Regrssion:
Accuracy: 0.7715
Precision: 0.5519
Recall: 0.7265
F1 Score: 0.6273

CatBoost:
Accuracy: 0.7743
Precision: 0.5714
Recall: 0.5898
F1 Score: 0.5805
