In [None]:
# Dependencies
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

In [None]:
# TODO: tuning!
# ==========================================
# 1. XGBoost Regressor (Uses X_train_xgb)
# ==========================================
print("Training XGBoost...")
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=1000,
    learning_rate=0.02,
    max_depth=7,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    tree_method='hist' # Faster
)

xgb_model.fit(
    X_train_xgb, y_train,
    eval_set=[(X_test_xgb, y_test)],
    early_stopping_rounds=50,
    verbose=100
)

# ==========================================
# 2. LightGBM Regressor (Uses X_train_lgb)
# ==========================================
print("\nTraining LightGBM...")
lgb_model = lgb.LGBMRegressor(
    objective='regression',
    metric='rmse',
    n_estimators=2000,
    learning_rate=0.02,
    max_depth=-1, # Leaf-wise growth
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

lgb_model.fit(
    X_train_lgb, y_train,
    eval_set=[(X_test_lgb, y_test)],
    # callbacks=[lgb.early_stopping(stopping_rounds=50)] # For newer versions of LGBM
    early_stopping_rounds=50, 
    verbose=100
)

# ==========================================
# 3. CatBoost Regressor (Uses X_train_cat)
# ==========================================
print("\nTraining CatBoost...")
# Identify categorical features indices for CatBoost
cat_features_indices = [X_train_cat.columns.get_loc(c) for c in 
                        ['Type', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2', 'Color3', 
                         'MaturitySize', 'FurLength', 'Vaccinated', 'Dewormed', 
                         'Sterilized', 'Health', 'State', 'RescuerID'] if c in X_train_cat.columns]

# Identify Text feature indices
text_features_indices = [X_train_cat.columns.get_loc('Description')] if 'Description' in X_train_cat.columns else []

cat_model = CatBoostRegressor(
    loss_function='RMSE',
    n_estimators=2000,
    learning_rate=0.03,
    depth=6,
    random_seed=42,
    verbose=200,
    task_type='CPU' # Change to GPU if available
)

cat_model.fit(
    X_train_cat, y_train,
    cat_features=cat_features_indices,
    text_features=text_features_indices, # Native text support!
    eval_set=(X_test_cat, y_test),
    early_stopping_rounds=50
)

print("\nAll models trained successfully!")

In [None]:
# Get raw predictions
pred_xgb = xgb_model.predict(X_test_xgb)
pred_lgb = lgb_model.predict(X_test_lgb)
pred_cat = cat_model.predict(X_test_cat)

# Simple Average Blend
final_blend_continuous = (pred_xgb + pred_lgb + pred_cat) / 3

# Now apply your OptimizedRounder to 'final_blend_continuous'

In [None]:
def evaluate_model(model, model_prediction):
    print(f"Model: {model.__class__.__name__}")
    print(f"Kappa Score: {cohen_kappa_score(model_prediction, y_test, weights='quadratic'):.4f}")
    print(f"Accuracy Score: {accuracy_score(model_prediction, y_test):.4f}")