In [11]:
import warnings
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import (
                                    StratifiedKFold,
                                    cross_validate
                                )
from sklearn.metrics import (
                            classification_report,
                            confusion_matrix,
                            roc_auc_score,
                            roc_curve,
                            accuracy_score,
                            precision_recall_curve,
                            f1_score,
                            precision_score,
                            recall_score
                        )
warnings.filterwarnings('ignore')

In [12]:
X_train = np.load('artifacts/X_train.npz')['arr_0']
Y_train = np.load('artifacts/Y_train.npz')['arr_0']
X_test = np.load('artifacts/X_test.npz')['arr_0']
Y_test = np.load('artifacts/Y_test.npz')['arr_0']

In [13]:
cv = StratifiedKFold(
                    n_splits=6,
                    shuffle=True,
                    random_state=42
                )

In [14]:
model_lr = RandomForestClassifier(
                                    n_estimators=300,
                                    criterion='entropy',
                                    max_depth=50
                                )

In [15]:
cv_results = cross_validate(
                            model_lr,
                            X_train,
                            Y_train,
                            cv=cv, scoring='f1',
                            return_estimator=True,
                            return_train_score=True
                        )

In [16]:
test_scores = cv_results['test_score']
best_index = np.argmax(test_scores)
estimator = cv_results['estimator'][best_index]

In [17]:
Y_hat_test = estimator.predict(X_test)

In [18]:
accuracy = accuracy_score(Y_test, Y_hat_test)
precision = precision_score(Y_test, Y_hat_test)
recall = recall_score(Y_test, Y_hat_test)
f1 = f1_score(Y_test, Y_hat_test)

print(f'Accuracy : {accuracy}')
print(f'precision : {precision}')
print(f'recall : {recall}')
print(f'f1_score : {f1}')

Accuracy : 0.7707594038325053
precision : 0.5637755102040817
recall : 0.5924932975871313
f1_score : 0.5777777777777777
