In [None]:
# ================================
# 🚀 Train XGBoost Model (Final Version with Best Known Params)
# ================================

"""
This notebook trains the XGBoost model using the best hyperparameters
found previously through GridSearchCV:
  - max_depth = 8
  - n_estimators = 200
  - learning_rate = default (None)

To reduce training time in Google Colab (CPU-only), this version skips
GridSearch and directly trains with those final parameters.

Outputs:
- Trained model (`xgb_model.joblib`)
- Classification report
"""

# 📦 Imports
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import joblib

# ✅ Load Features and Labels
X = np.load("/content/drive/MyDrive/capstone_data/x_xgb_features.npy")
y = np.load("/content/drive/MyDrive/capstone_data/y_xgb_labels.npy")

print(f"[INFO] Loaded X shape: {X.shape}")
print(f"[INFO] Loaded y shape: {y.shape}")

# ✅ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Directly Train with Best Params
print("[INFO] Training with best-known parameters: max_depth=8, n_estimators=200")
xgb = XGBClassifier(
    max_depth=8,
    n_estimators=200,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)
xgb.fit(X_train, y_train)

# ✅ Evaluate
y_pred = xgb.predict(X_test)
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))

# ✅ Save Model
MODEL_PATH = "/content/drive/MyDrive/capstone_data/xgb_model.joblib"
joblib.dump(xgb, MODEL_PATH)
print(f"[✅] XGBoost model saved to: {MODEL_PATH}")