In [None]:
import numpy as np
import xgboost as xgb
from sklearn.metrics import f1_score, classification_report
from sklearn.model_selection import train_test_split

# Define custom objective function
def custom_loss(y_true, y_pred):
    """
    y_true: True labels (DMatrix format)
    y_pred: Predicted probabilities
    """
    # Reshape the predictions to (n_samples, n_classes)
    y_pred = y_pred.reshape(-1, len(np.unique(y_true.get_label())))

    # Get true labels as integers
    y_true = y_true.get_label().astype(int)

    # Calculate softmax probabilities
    softmax_preds = np.exp(y_pred - np.max(y_pred, axis=1, keepdims=True))
    softmax_preds /= softmax_preds.sum(axis=1, keepdims=True)

    # Compute gradients and hessians (second derivatives)
    grad = softmax_preds
    grad[np.arange(len(y_true)), y_true] -= 1

    # Define higher penalties for misclassifications between A and B
    penalty_matrix = np.ones((len(np.unique(y_true)), len(np.unique(y_true))))
    penalty = 2.0  # Set a penalty multiplier
    important_classes = ['A', 'B']

    for i in important_classes:
        for j in important_classes:
            if i != j:
                penalty_matrix[i, j] = penalty

    # Apply penalties to the gradients
    for idx, (true_label, pred) in enumerate(zip(y_true, grad)):
        grad[idx] = penalty_matrix[true_label] * pred

    # Hessian is typically grad * (1 - softmax) for multi-class classification
    hess = grad * (1 - softmax_preds)

    return grad.flatten(), hess.flatten()

# Define a custom evaluation metric for monitoring performance
def custom_f1_metric(y_true, y_pred):
    y_pred = np.argmax(y_pred.reshape(-1, len(np.unique(y_true))), axis=1)
    y_true = y_true.get_label().astype(int)
    f1 = f1_score(y_true, y_pred, average='weighted')
    return 'custom_f1', f1

# Prepare data for XGBoost
X = df_final.drop(['target'], axis=1)
y = df_final['target'].astype(int)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model with the custom loss function
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'objective': 'multi:softmax',  # This is required for multi-class problems
    'num_class': len(np.unique(y)),  # Number of classes
    'eval_metric': 'mlogloss',  # Monitor log-loss
    'learning_rate': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': 42
}

# Train with the custom loss function and custom evaluation metric
model = xgb.train(
    params,
    dtrain,
    num_boost_round=100,
    obj=custom_loss,
    feval=custom_f1_metric,
    evals=[(dtest, 'test')],
    early_stopping_rounds=10
)

# Make predictions and evaluate
y_pred = model.predict(dtest)
y_pred = np.round(y_pred)

# Evaluation
print("Custom XGBoost Model Performance:")
print(classification_report(y_test, y_pred))