In [1]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)
from utils.UtilityFunctions import save_model, write_json_to_file
#-- Read Test and Train Files Generated in DataPrep.ipynb -------------------------
X_train = pd.read_csv("../data/split/X_train.csv")
X_test = pd.read_csv("../data/split/X_test.csv")  
y_train = pd.read_csv("../data/split/y_train.csv")
y_test = pd.read_csv("../data/split/y_test.csv")

# Number of classes
num_classes = len(set(y_train))

# -----------------------------
# Initialize XGBoost
# -----------------------------
xgb_model = XGBClassifier(
    objective="multi:softprob",  # required for probabilities
    num_class=num_classes,
    n_estimators=300,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric="mlogloss"
)

# -----------------------------
# Train
# -----------------------------
xgb_model.fit(X_train, y_train)

#-- Save the trained model using pickle

trained_model_path="trained_models/XGBoostModel.pkl"
save_model(xgb_model, trained_model_path)

# -----------------------------
# Predict
# -----------------------------
y_pred_xgb = xgb_model.predict(X_test)
y_prob_xgb = xgb_model.predict_proba(X_test)

# -----------------------------
# Evaluation Metrics
# -----------------------------
xgb_metrics = {
    "ML Model Name": "XGBoost (Ensemble)",
    "Accuracy": accuracy_score(y_test, y_pred_xgb),
    "AUC": roc_auc_score(
        y_test,
        y_prob_xgb,
        multi_class="ovr",
        average="macro"
    ),
    "Precision": precision_score(y_test, y_pred_xgb, average="macro"),
    "Recall": recall_score(y_test, y_pred_xgb, average="macro"),
    "F1": f1_score(y_test, y_pred_xgb, average="macro"),
    "MCC": matthews_corrcoef(y_test, y_pred_xgb)
}

xgb_metrics
write_json_to_file(xgb_metrics, "../metrics/XGBoostMetrics.json")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


True