In [1]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("../Data/Preprocessed_LCData.csv")

In [3]:
#XGboost requires numbers staring from 0
grade_map = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6}
df['grade'] = df['grade'].map(grade_map)

In [4]:
#Prepare features and target
X = df.drop(['default_flag', 'grade'], axis=1)  # default_flag omitted
y = df['grade']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [6]:
xgb_grade = XGBClassifier(
    tree_method='hist',   # CPU-friendly; can also use 'gpu_hist' or 'hist' + device='cuda' if GPU available
    device='cuda',        # enable GPU
    objective='multi:softmax',
    num_class=7,          # number of grade classes
    eval_metric='mlogloss',
    random_state=42
)

In [7]:
param_grid = {
    'n_estimators': [100, 200, 250],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.8, 1]
}

In [8]:
grid_grade = GridSearchCV(
    estimator=xgb_grade,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    verbose=1
)

In [None]:
grid_grade.fit(X_train, y_train)

Fitting 3 folds for each of 36 candidates, totalling 108 fits


In [None]:
best_xgb_grade = grid_grade.best_estimator_

In [None]:
y_pred = best_xgb_grade.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='BuGn',
            xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
joblib.dump(best_xgb_grade, 'xgb_grade_model.pkl')