In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay,roc_auc_score,precision_recall_curve,classification_report,auc
from sklearn.model_selection import learning_curve,cross_val_score,LearningCurveDisplay
import joblib

In [3]:
x_train = pd.read_parquet('data/x_train.parquet')
y_train = pd.read_parquet('data/y_train.parquet')['y_train']
x_test = pd.read_parquet('data/x_test.parquet')
y_test = pd.read_parquet('data/y_test.parquet')['y_test']

In [None]:
best_model = joblib.load('models/XGB_Classifier_best_model.pkl')

In [None]:
y_pred_proba = best_model.predict_proba(x_test)[:,1]
threshold = 0.3
y_pred = (y_pred_proba >= threshold).astype(int)

In [None]:
cv_score = cross_val_score(best_model,x_train,y_train,cv=5,scoring='f1_macro',n_jobs=-1,verbose=2)
print('F1 score across folds: ',cv_score)
print('Mean score : ', cv_score.mean())
print('Standard deviation : ',cv_score.std())

In [None]:
cm = confusion_matrix(y_test,y_pred)
disp = ConfusionMatrixDisplay(cm,display_labels=best_model.classes_)
disp.plot(cmap='viridis',colorbar=False)
plt.show()

In [None]:
train_sizes, train_score, test_score = learning_curve(
    best_model,x_train,y_train,cv = 5, scoring='f1_macro'
)
disp = LearningCurveDisplay(train_sizes=train_sizes,
                            train_scores=np.mean(train_score,axis=1), label="Train",
                            test_scores=np.mean(test_score,axis=1), label='CV')
plt.legend()
plt.title('Learning Curve')
plt.show()