In [2]:
# %%
# Install XGBoost if you haven't already
%pip install xgboost

# %%
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize the XGBoost classifier for multiclass classification.
# The objective 'multi:softprob' tells XGBoost to output probabilities for each class.
model_xgb = XGBClassifier(objective='multi:softprob', use_label_encoder=False, eval_metric='mlogloss')

# Train the model using the preprocessed training data.
model_xgb.fit(X_train_preprocessed, y_train)

# Make predictions and obtain class probabilities on the test set.
y_pred_xgb = model_xgb.predict(X_test_preprocessed)
y_proba_xgb = model_xgb.predict_proba(X_test_preprocessed)

# Evaluate the performance with a confusion matrix and a classification report.
print("Confusion Matrix for XGBoost:")
print(confusion_matrix(y_test, y_pred_xgb))
print("\nClassification Report for XGBoost:")
print(classification_report(y_test, y_pred_xgb))


Note: you may need to restart the kernel to use updated packages.


NameError: name 'X_train_preprocessed' is not defined

In [1]:
# %%
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
import numpy as np

# Ensure the cell at index 0 is executed before this cell to define model_xgb, y_test, and y_proba_xgb.

# Select the index corresponding to the 'home_win' class.
home_win_index = np.where(model_xgb.classes_ == 'home_win')[0][0]

# Compute the calibration curve data.
prob_true, prob_pred = calibration_curve((y_test == 'home_win').astype(int), 
                                           y_proba_xgb[:, home_win_index],
                                           n_bins=10)

# Plot the calibration curve.
plt.figure(figsize=(8,6))
plt.plot(prob_pred, prob_true, marker='o', linewidth=1, label='Home Win')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfect Calibration')
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positives')
plt.title('Calibration Plot for Home Win (XGBoost)')
plt.legend()
plt.show()


NameError: name 'model_xgb' is not defined

In [None]:
# %%
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

# Binarize the test labels for multiclass ROC analysis.
y_test_bin = label_binarize(y_test, classes=model_xgb.classes_)
n_classes = y_test_bin.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()

# Plot ROC curves for each class.
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_proba_xgb[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    plt.plot(fpr[i], tpr[i], label=f'ROC curve for {model_xgb.classes_[i]} (area = {roc_auc[i]:0.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Chance')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves (XGBoost)')
plt.legend(loc="lower right")
plt.show()

# %%
from sklearn.metrics import precision_recall_curve, average_precision_score

# Plot Precision-Recall curves for each class.
for i in range(n_classes):
    precision, recall, _ = precision_recall_curve(y_test_bin[:, i], y_proba_xgb[:, i])
    avg_precision = average_precision_score(y_test_bin[:, i], y_proba_xgb[:, i])
    plt.plot(recall, precision, label=f'Precision-Recall for {model_xgb.classes_[i]} (AP = {avg_precision:0.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves (XGBoost)')
plt.legend(loc='best')
plt.show()
