In [1]:
#!/usr/bin/env python
# coding: utf-8
#
# XGBoost Champion Model Bake-Off with Optuna, GPU, and Full Metrics
#
from __future__ import annotations
import warnings
from pathlib import Path
from typing import Dict, Any

import numpy as np
import pandas as pd
import xgboost as xgb
import optuna
# 🔥 Ensuring all necessary metric functions are imported
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Suppress Optuna's trial info messages and other warnings for cleaner output
optuna.logging.set_verbosity(optuna.logging.WARNING)
warnings.filterwarnings("ignore", category=UserWarning)

class XGBoost_Champion_Finder:
    """
    A dedicated class to find the best XGBoost model, accelerated by GPU.
    """
    def __init__(self, config: Dict[str, Any]):
        self.config = config
        self.df = self._load_data(config['csv_path'])
        self.feat_cols = [c for c in self.df.columns if c not in config['meta_cols']]
        self.X_all, self.y_all = self._make_windows()
        
        self.X_tune, self.y_tune, \
        self.X_val, self.y_val, \
        self.X_test, self.y_test = self._split_data()

    def _load_data(self, path: str | Path) -> pd.DataFrame:
        print("─" * 60 + "\n1. Loading and cleaning data...")
        df = pd.read_csv(path).loc[:, ~pd.read_csv(path).columns.duplicated()]
        req = set(self.config['meta_cols'])
        if missing := req - set(df.columns): raise KeyError(f"Missing cols: {missing}")
        df[self.config['quarter_col']] = pd.to_datetime(df[self.config['quarter_col']])
        df.sort_values([self.config['id_col'], self.config['quarter_col']], inplace=True)
        df = df.dropna()
        num_cols = df.select_dtypes(include=[np.number]).columns
        return df[list(req | set(num_cols))]

    def _make_windows(self) -> (np.ndarray, np.ndarray):
        print("2. Preparing sequence data...")
        X, y = [], []
        cfg = self.config
        for _, g in self.df.groupby(cfg['id_col']):
            g = g.sort_values(cfg['quarter_col'])
            arr, lbl = g[self.feat_cols].to_numpy(), g[cfg['target_col']].to_numpy()
            for i in range(cfg['lags'], len(g)):
                X.append(arr[i - cfg['lags']:i].ravel())
                y.append(lbl[i])
        return np.asarray(X), np.asarray(y)

    def _split_data(self):
        """Splits data chronologically into Tune, Validation, and Test sets."""
        print("3. Splitting data into Tune (60%), Validation (20%), and Test (20%) sets...")
        n = len(self.y_all)
        tune_end = int(n * 0.6)
        val_end = int(n * 0.8)
        
        X_tune, y_tune = self.X_all[:tune_end], self.y_all[:tune_end]
        X_val, y_val = self.X_all[tune_end:val_end], self.y_all[tune_end:val_end]
        X_test, y_test = self.X_all[val_end:], self.y_all[val_end:]
        
        print(f"   Tune set size: {len(y_tune)}")
        print(f"   Validation set size: {len(y_val)}")
        print(f"   Test set size: {len(y_test)}")
        return X_tune, y_tune, X_val, y_val, X_test, y_test

    def _objective(self, trial: optuna.Trial) -> float:
        """The objective function for Optuna to maximize, specifically for XGBoost."""
        params = {
            'objective': 'binary:logistic', 'eval_metric': 'logloss', 'verbosity': 0,
            'tree_method': 'gpu_hist',
            'random_state': self.config['seed'],
            'n_estimators': trial.suggest_int('n_estimators', 200, 1000, step=100),
            'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
            'scale_pos_weight': trial.suggest_int('scale_pos_weight', 20, 50),
        }

        model = xgb.XGBClassifier(**params).fit(self.X_tune, self.y_tune)
        y_probs = model.predict_proba(self.X_val)[:, 1]
        
        best_f1 = 0
        for threshold in np.arange(0.1, 0.9, 0.05):
            preds = (y_probs > threshold).astype(int)
            best_f1 = max(best_f1, f1_score(self.y_val, preds))
            
        return best_f1
    
    def _evaluate_on_test_set(self, params: Dict[str, Any], model_name: str):
        """Evaluates an XGBoost model on the final test set with full metrics."""
        print(f"\n--- Evaluating '{model_name}' on the Final Test Set ---")
        
        win_size = self.config['sliding_win_size']
        retrain_interval = self.config['retrain_interval']
        X_history = np.vstack([self.X_tune, self.X_val])
        y_history = np.concatenate([self.y_tune, self.y_val])
        
        all_probs, all_trues = [], []
        model = None

        for i in range(len(self.X_test)):
            if model is None or i % retrain_interval == 0:
                print(f"  Retraining at test step {i}...")
                X_train_current = np.vstack([X_history, self.X_test[:i]])
                y_train_current = np.concatenate([y_history, self.y_test[:i]])
                X_train_window, y_train_window = X_train_current[-win_size:], y_train_current[-win_size:]
                
                model = xgb.XGBClassifier(**params).fit(X_train_window, y_train_window)

            X_test_point = self.X_test[i].reshape(1, -1)
            y_prob = model.predict_proba(X_test_point)[:, 1][0]
            all_probs.append(y_prob)
            all_trues.append(self.y_test[i])
            
        print("\n  Tuning classification threshold and calculating all metrics...")
        
        final_auc = roc_auc_score(all_trues, all_probs)
        best_f1, best_thresh, best_prec, best_rec = 0, 0, 0, 0
        
        for threshold in np.arange(0.1, 0.9, 0.01):
            preds = (np.array(all_probs) > threshold).astype(int)
            current_f1 = f1_score(all_trues, preds, zero_division=0)
            if current_f1 > best_f1:
                best_f1, best_thresh = current_f1, threshold
                best_prec = precision_score(all_trues, preds, zero_division=0)
                best_rec = recall_score(all_trues, preds, zero_division=0)
        
        final_gmean = np.sqrt(best_prec * best_rec) if best_prec > 0 and best_rec > 0 else 0

        print(f"\n[{model_name}] Final Test Set Performance:")
        print(f"  Best Threshold = {best_thresh:.2f}")
        print(f"  F1-Score       = {best_f1:.4f}")
        print(f"  AUC            = {final_auc:.4f}")
        print(f"  G-Mean         = {final_gmean:.4f}")
        print(f"  Precision      = {best_prec:.4f}")
        print(f"  Recall         = {best_rec:.4f}")

    def run(self):
        """Orchestrates the entire XGBoost bake-off process."""
        print("Starting XGBoost Championship Bake-Off...")
        
        # --- Model 1: Expert-Tuned Baseline ---
        expert_params = self.config['xgboost_expert_params']
        self._evaluate_on_test_set(expert_params, "Expert-Tuned XGBM")

        # --- Model 2: Optuna-Optimized Champion ---
        print("\n" + "═" * 60)
        print("Finding and Evaluating Optuna-Tuned XGBM")
        print("═" * 60)
        print("4. Starting Optuna optimization process...")
        study = optuna.create_study(direction='maximize')
        study.optimize(self._objective, n_trials=self.config['optuna_trials'], show_progress_bar=True)
        
        print(f"\nOptuna process finished!")
        print(f"🏆 Best F1-score on Validation Set: {study.best_value:.4f}")
        print(f"🏆 Best Hyperparameters Found: {study.best_params}")
        
        optuna_params = {**self.config['xgboost_expert_params'], **study.best_params}
        self._evaluate_on_test_set(optuna_params, "Optuna-Tuned XGBM")
        print("\nBake-Off Complete!")


if __name__ == "__main__":
    CONFIG = {
        "csv_path": r'cvm_indicators_dataset_2011-2021.csv',
        "id_col": "ID", "quarter_col": "QUARTER", "target_col": "LABEL",
        "meta_cols": ["ID", "QUARTER", "LABEL"],
        "lags": 4, "seed": 42,
        
        "sliding_win_size": 200,
        "retrain_interval": 500,

        "optuna_trials": 50,

        # Expert-tuned parameters for XGBoost, including GPU settings
        "xgboost_expert_params": {
            "objective": 'binary:logistic', "eval_metric": 'logloss', "verbosity": 0,
            "tree_method": 'gpu_hist', 'predictor': 'gpu_predictor', 
            "random_state": 42, "n_estimators": 500, "learning_rate": 0.05,
            "scale_pos_weight": 35 
        },
    }

    champion_finder = XGBoost_Champion_Finder(config=CONFIG)
    champion_finder.run()

────────────────────────────────────────────────────────────
1. Loading and cleaning data...
2. Preparing sequence data...
3. Splitting data into Tune (60%), Validation (20%), and Test (20%) sets...
   Tune set size: 12256
   Validation set size: 4086
   Test set size: 4086
Starting XGBoost Championship Bake-Off...

--- Evaluating 'Expert-Tuned XGBM' on the Final Test Set ---
  Retraining at test step 0...
  Retraining at test step 500...
  Retraining at test step 1000...
  Retraining at test step 1500...
  Retraining at test step 2000...
  Retraining at test step 2500...
  Retraining at test step 3000...
  Retraining at test step 3500...
  Retraining at test step 4000...

  Tuning classification threshold and calculating all metrics...

[Expert-Tuned XGBM] Final Test Set Performance:
  Best Threshold = 0.46
  F1-Score       = 0.1843
  AUC            = 0.6236
  G-Mean         = 0.2151
  Precision      = 0.1216
  Recall         = 0.3806

═════════════════════════════════════════════════

  0%|          | 0/50 [00:00<?, ?it/s]


Optuna process finished!
🏆 Best F1-score on Validation Set: 0.5657
🏆 Best Hyperparameters Found: {'n_estimators': 400, 'learning_rate': 0.018726925891811734, 'max_depth': 3, 'subsample': 0.7664833578190968, 'colsample_bytree': 0.8879894104083014, 'gamma': 1.3314960759500672e-06, 'scale_pos_weight': 33}

--- Evaluating 'Optuna-Tuned XGBM' on the Final Test Set ---
  Retraining at test step 0...
  Retraining at test step 500...
  Retraining at test step 1000...
  Retraining at test step 1500...
  Retraining at test step 2000...
  Retraining at test step 2500...
  Retraining at test step 3000...
  Retraining at test step 3500...
  Retraining at test step 4000...

  Tuning classification threshold and calculating all metrics...

[Optuna-Tuned XGBM] Final Test Set Performance:
  Best Threshold = 0.79
  F1-Score       = 0.2149
  AUC            = 0.6782
  G-Mean         = 0.2186
  Precision      = 0.1816
  Recall         = 0.2632

Bake-Off Complete!
