In [1]:
!pip install -U xgboost optuna optuna-integration --no-cache-dir -q

import os
import numpy as np
import pandas as pd
import xgboost as xgb
import optuna
from optuna.integration import XGBoostPruningCallback
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import gc


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.9/115.9 MB[0m [31m123.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m297.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.1/99.1 kB[0m [31m262.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
def configure_gpu():
    gpu_available = False
    try:
        X_demo = np.random.rand(1000, 50).astype(np.float32)
        y_demo = np.random.randint(0, 2, size=1000)
        dtrain = xgb.DMatrix(X_demo, label=y_demo)
        xgb.train(
            params={
                "objective": "binary:logistic",
                "device": "cuda",
                "tree_method": "hist",
            },
            dtrain=dtrain,
            num_boost_round=1,
            verbose_eval=False
        )
        gpu_available = True
        print("GPU acceleration enabled")
    except Exception as e:
        print("GPU test failed, using CPU:", e)

    return gpu_available

USE_XGB_GPU = configure_gpu()
print("GPU available:", USE_XGB_GPU)

def reduce_memory_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2

    for col in df.columns:
        col_type = df[col].dtype

        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()

            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                else:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    if verbose:
        end_mem = df.memory_usage().sum() / 1024**2
        print(f"Memory usage reduced from {start_mem:.2f} MB to {end_mem:.2f} MB ({100 * (start_mem - end_mem) / start_mem:.1f}% reduction)")

    return df

GPU acceleration enabled
GPU available: True


In [3]:
TRAIN_PATH = "/content/drive/MyDrive/ait-511-course-project-1-obesity-risk/train.csv"
TEST_PATH  = "/content/drive/MyDrive/ait-511-course-project-1-obesity-risk/test.csv"
ID_COL = "id"
TARGET_COL = "WeightCategory"
N_SPLITS = 5
EARLY_STOP_ROUNDS = 150
N_TRIALS = 60

print("Loading and optimizing data...")
train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)

train = reduce_memory_usage(train)
test = reduce_memory_usage(test)

X_full = train.drop(columns=[ID_COL, TARGET_COL], errors="ignore").copy()
y_full = train[TARGET_COL].copy()
test_features = test.drop(columns=[ID_COL], errors="ignore").copy()

missing_cols = set(X_full.columns) - set(test_features.columns)
for col in missing_cols:
    test_features[col] = np.nan
test_features = test_features[X_full.columns]

cat_cols = X_full.select_dtypes(include=["object"]).columns.tolist()
for col in cat_cols:
    X_full[col] = X_full[col].astype('category')
    test_features[col] = test_features[col].astype('category')

target_le = LabelEncoder()
y_enc_full = target_le.fit_transform(y_full)
num_classes = len(target_le.classes_)

print(f"Data loaded: {X_full.shape[0]} samples, {X_full.shape[1]} features, {num_classes} classes")

Loading and optimizing data...
Memory usage reduced from 2.13 MB to 1.57 MB (26.4% reduction)
Memory usage reduced from 0.68 MB to 0.49 MB (27.9% reduction)
Data loaded: 15533 samples, 16 features, 7 classes


In [4]:
def objective(trial):
    params = {
        "objective": "multi:softprob",
        "num_class": num_classes,
        "eval_metric": "mlogloss",
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 15),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0.0, 0.5),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.5, 3.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.5),
        "max_bin": trial.suggest_int("max_bin", 64, 512),
        "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]),
        "seed": 42,
    }

    if USE_XGB_GPU:
        gpu_params = {
            "device": "cuda",
            "tree_method": "hist",
        }
        params.update(gpu_params)

        if params["grow_policy"] == "lossguide":
            params["max_leaves"] = trial.suggest_int("max_leaves", 8, 256)
    else:
        params["tree_method"] = "hist"

    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    accs = []

    for tr_idx, va_idx in skf.split(X_full, y_enc_full):
        X_tr, X_va = X_full.iloc[tr_idx], X_full.iloc[va_idx]
        y_tr, y_va = y_enc_full[tr_idx], y_enc_full[va_idx]

        dtrain = xgb.DMatrix(X_tr, label=y_tr, enable_categorical=True)
        dvalid = xgb.DMatrix(X_va, label=y_va, enable_categorical=True)

        pruning_callback = XGBoostPruningCallback(trial, "val-mlogloss")

        model = xgb.train(
            params=params,
            dtrain=dtrain,
            num_boost_round=1500,
            evals=[(dvalid, "val")],
            early_stopping_rounds=100,
            verbose_eval=False,
            callbacks=[pruning_callback]
        )

        preds = model.predict(dvalid, iteration_range=(0, model.best_iteration + 1))
        accs.append(accuracy_score(y_va, preds.argmax(axis=1)))

        del model, dtrain, dvalid
        gc.collect()

    return np.mean(accs)

print("Running optimized Optuna hyperparameter search...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective, n_trials=N_TRIALS, gc_after_trial=True)

print("Starting final model training with GPU optimizations...")

[I 2025-10-26 13:42:31,037] A new study created in memory with name: no-name-1e34d0bc-db3a-4eb2-8c67-776991eeb283


Running optimized Optuna hyperparameter search...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[I 2025-10-26 14:09:32,667] Trial 57 finished with value: 0.9057490993666487 and parameters: {'learning_rate': 0.010377123331696742, 'max_depth': 10, 'min_child_weight': 15, 'subsample': 0.8227664475557125, 'colsample_bytree': 0.6950138488744412, 'gamma': 0.41937522760360424, 'reg_lambda': 1.6236369771080208, 'reg_alpha': 0.30731266627595477, 'max_bin': 116, 'grow_policy': 'lossguide', 'max_leaves': 197}. Best is trial 44 with value: 0.906328473642431.
[I 2025-10-26 14:09:33,072] Trial 58 pruned. Trial was pruned at iteration 3.
[I 2025-10-26 14:09:33,393] Trial 59 pruned. Trial was pruned at iteration 1.


Starting final model training with GPU optimizations...


In [6]:
if study.best_trials:
    best_params = study.best_params
    print("Best parameters found:")
    for key, value in best_params.items():
        print(f"   {key}: {value}")
else:
    print("Using optimized default parameters")
    best_params = {
        'learning_rate': 0.1, 'max_depth': 8, 'min_child_weight': 5,
        'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2,
        'reg_lambda': 1.0, 'reg_alpha': 0.1, 'max_bin': 256
    }

final_params = {
    **best_params,
    "objective": "multi:softprob",
    "num_class": num_classes,
    "eval_metric": "mlogloss",
    "seed": 42,
}

if USE_XGB_GPU:
    final_params.update({
        "device": "cuda",
        "tree_method": "hist",
    })
else:
    final_params["tree_method"] = "hist"

skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
oof_preds = np.zeros((len(X_full), num_classes), dtype=np.float32)
test_preds = np.zeros((len(test_features), num_classes), dtype=np.float32)

for fold, (tr_idx, va_idx) in enumerate(skf.split(X_full, y_enc_full), 1):
    print(f"\n Fold {fold}/{N_SPLITS} ")

    X_tr, X_va = X_full.iloc[tr_idx], X_full.iloc[va_idx]
    y_tr, y_va = y_enc_full[tr_idx], y_enc_full[va_idx]

    dtrain = xgb.DMatrix(X_tr, label=y_tr, enable_categorical=True)
    dvalid = xgb.DMatrix(X_va, label=y_va, enable_categorical=True)
    dtest  = xgb.DMatrix(test_features, enable_categorical=True)

    booster = xgb.train(
        params=final_params,
        dtrain=dtrain,
        num_boost_round=3000,
        evals=[(dvalid, "val")],
        early_stopping_rounds=EARLY_STOP_ROUNDS,
        verbose_eval=100
    )

    oof_preds[va_idx] = booster.predict(dvalid, iteration_range=(0, booster.best_iteration + 1))
    test_preds += booster.predict(dtest, iteration_range=(0, booster.best_iteration + 1)) / N_SPLITS

    acc = accuracy_score(y_va, oof_preds[va_idx].argmax(axis=1))
    print(f"Fold {fold} Accuracy: {acc:.4f}")

    del booster, dtrain, dvalid, dtest
    gc.collect()

final_acc = accuracy_score(y_enc_full, oof_preds.argmax(axis=1))
print(f"\n Final OOF Accuracy: {final_acc:.4f}")

test_pred_labels = test_preds.argmax(axis=1)
test_pred_classes = target_le.inverse_transform(test_pred_labels)

submission = pd.DataFrame({ID_COL: test[ID_COL], TARGET_COL: test_pred_classes})
submission_path = "/content/drive/MyDrive/ait-511-course-project-1-obesity-risk/submission_optuna_xgb_optuna_60.csv"
submission.to_csv(submission_path, index=False)

print("\n Saved submission to:", submission_path)
print(submission.head())

Best parameters found:
   learning_rate: 0.011524791093618694
   max_depth: 9
   min_child_weight: 14
   subsample: 0.8981593591506989
   colsample_bytree: 0.6011828986557011
   gamma: 0.45453589781508325
   reg_lambda: 0.9984089816540335
   reg_alpha: 0.16108602014622236
   max_bin: 97
   grow_policy: lossguide
   max_leaves: 203

 Fold 1/5 
[0]	val-mlogloss:1.91646
[100]	val-mlogloss:0.86108
[200]	val-mlogloss:0.53635
[300]	val-mlogloss:0.40688
[400]	val-mlogloss:0.34645
[500]	val-mlogloss:0.31615
[600]	val-mlogloss:0.29957
[700]	val-mlogloss:0.29072
[800]	val-mlogloss:0.28547
[900]	val-mlogloss:0.28245
[1000]	val-mlogloss:0.28055
[1100]	val-mlogloss:0.27940
[1200]	val-mlogloss:0.27885
[1300]	val-mlogloss:0.27848
[1400]	val-mlogloss:0.27823
[1500]	val-mlogloss:0.27808
[1600]	val-mlogloss:0.27801
[1700]	val-mlogloss:0.27799
[1800]	val-mlogloss:0.27797
[1876]	val-mlogloss:0.27805
Fold 1 Accuracy: 0.9012

 Fold 2/5 
[0]	val-mlogloss:1.91614
[100]	val-mlogloss:0.85367
[200]	val-mlogloss: