In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA

# Load both train and test
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

# Drop unwanted columns
drop_cols = ['id','composition_label_0','composition_label_1','publication_timestamp',
             'lunar_phase','creator_collective','composition_label_2','track_identifier']

df_train.drop(columns=drop_cols, inplace=True)
df_test.drop(columns=drop_cols, inplace=True)

# Separate features and target in train
target = df_train['target']
df_train.drop(columns=['target'], inplace=True)

# Identify numeric and categorical columns from train (important: use train only for this)
numeric_cols = df_train.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_cols = df_train.select_dtypes(include=['object', 'category']).columns.tolist()

# Split numeric cols by missingness threshold on train data
threshold = int(0.10 * len(df_train))  # 10%
numeric_null_counts = df_train[numeric_cols].isna().sum()

low_null_numeric = numeric_null_counts[numeric_null_counts <= threshold].index.tolist()
high_null_numeric = numeric_null_counts[numeric_null_counts > threshold].index.tolist()

# ---------- Numeric imputation ----------
# Fit KNN imputer on train low-missing numeric, transform both train and test
knn_imputer = KNNImputer(n_neighbors=3)
train_low_num_imputed = pd.DataFrame(
    knn_imputer.fit_transform(df_train[low_null_numeric]),
    columns=low_null_numeric
)
test_low_num_imputed = pd.DataFrame(
    knn_imputer.transform(df_test[low_null_numeric]),
    columns=low_null_numeric
)

# Fit SimpleImputer on train high-missing numeric, transform both train and test
simple_imputer = SimpleImputer(strategy='mean')
train_high_num_imputed = pd.DataFrame(
    simple_imputer.fit_transform(df_train[high_null_numeric]),
    columns=high_null_numeric
)
test_high_num_imputed = pd.DataFrame(
    simple_imputer.transform(df_test[high_null_numeric]),
    columns=high_null_numeric
)

# Combine numeric imputations for train and test
train_num_imputed = pd.concat([train_low_num_imputed, train_high_num_imputed], axis=1)
test_num_imputed = pd.concat([test_low_num_imputed, test_high_num_imputed], axis=1)

# ---------- Categorical imputation ----------
cat_imputer = SimpleImputer(strategy='most_frequent')
train_cat_imputed = pd.DataFrame(
    cat_imputer.fit_transform(df_train[categorical_cols]),
    columns=categorical_cols
)
test_cat_imputed = pd.DataFrame(
    cat_imputer.transform(df_test[categorical_cols]),
    columns=categorical_cols
)

# OneHotEncoding
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
train_cat_encoded = pd.DataFrame(
    encoder.fit_transform(train_cat_imputed),
    columns=encoder.get_feature_names_out(categorical_cols)
)
test_cat_encoded = pd.DataFrame(
    encoder.transform(test_cat_imputed),
    columns=encoder.get_feature_names_out(categorical_cols)
)

# Combine numeric + categorical features for train and test
train_processed = pd.concat([train_num_imputed.reset_index(drop=True), train_cat_encoded.reset_index(drop=True)], axis=1)
test_processed = pd.concat([test_num_imputed.reset_index(drop=True), test_cat_encoded.reset_index(drop=True)], axis=1)

# ---------- Scaling ----------
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_processed)
test_scaled = scaler.transform(test_processed)



# Final DataFrames
df_train_final = pd.DataFrame(train_scaled, columns=[f"PC{i+1}" for i in range(train_scaled.shape[1])])
df_train_final['target'] = target.reset_index(drop=True)

df_test_final = pd.DataFrame(test_scaled, columns=[f"PC{i+1}" for i in range(test_scaled.shape[1])])

# Now df_train_final and df_test_final are ready for modeling!


In [2]:
import optuna
import pandas as pd
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from lightgbm.callback import early_stopping

# -------------------------------
# Step 0: Prepare Final DataFrames
# -------------------------------

df_train_final = pd.DataFrame(train_scaled, columns=[f"PC{i+1}" for i in range(train_scaled.shape[1])])
df_train_final['target'] = target.reset_index(drop=True)
df_test_final = pd.DataFrame(test_scaled, columns=[f"PC{i+1}" for i in range(test_scaled.shape[1])])

# -------------------------------
# Step 1: Split data for evaluation
# -------------------------------
X = df_train_final.drop(columns=['target'])
y = df_train_final['target']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------------
# Step 2: Optuna objective functions
# -------------------------------
def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'tree_method': 'gpu_hist',
        'random_state': 42
    }
    model = XGBRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=10, verbose=False)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds, squared=False)

def objective_lgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'device': 'gpu',
        'random_state': 42
    }
    model = LGBMRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], callbacks=[early_stopping(stopping_rounds=10)])
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds, squared=False)

def objective_cat(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'depth': trial.suggest_int('depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'bootstrap_type': 'Bernoulli',
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'task_type': 'GPU',
        'random_seed': 42,
        'verbose': 0
    }
    model = CatBoostRegressor(**params)
    model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=10, verbose=False)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds, squared=False)

# -------------------------------
# Step 3: Hyperparameter tuning
# -------------------------------
print("Tuning XGBoost...")
study_xgb = optuna.create_study(direction='minimize')
study_xgb.optimize(objective_xgb, n_trials=30)
best_xgb = XGBRegressor(**study_xgb.best_params, tree_method='gpu_hist', random_state=42)
best_xgb.fit(X, y)

print("Tuning LightGBM...")
study_lgb = optuna.create_study(direction='minimize')
study_lgb.optimize(objective_lgb, n_trials=30)
best_lgb = LGBMRegressor(**study_lgb.best_params, device='gpu', random_state=42)
best_lgb.fit(X, y)

print("Tuning CatBoost...")
study_cat = optuna.create_study(direction='minimize')
study_cat.optimize(objective_cat, n_trials=30)

cat_params = study_cat.best_params.copy()
cat_params.update({
    'task_type': 'GPU',
    'random_seed': 42,
    'verbose': 0,
    'bootstrap_type': 'Bernoulli'
})

best_cat = CatBoostRegressor(**cat_params)
best_cat.fit(X, y)

# -------------------------------
# Step 4: Ensemble model
# -------------------------------
ensemble = VotingRegressor([
    ('xgb', best_xgb),
    ('lgb', best_lgb),
    ('cat', best_cat)
])
ensemble.fit(X, y)

# -------------------------------
# Step 5: Evaluate ensemble
# -------------------------------
val_preds = ensemble.predict(X_val)
val_rmse = mean_squared_error(y_val, val_preds, squared=False)
val_mape = mean_absolute_percentage_error(y_val, val_preds)
val_r2 = r2_score(y_val, val_preds)

print("\n📊 Ensemble Validation Metrics:")
print(f" - RMSE: {val_rmse:.4f}")
print(f" - MAPE: {val_mape:.4f}")
print(f" - R²:   {val_r2:.4f}")

# -------------------------------
# Step 6: Predict on test set
# -------------------------------
test_preds = ensemble.predict(df_test_final)

# -------------------------------
# Step 7: Save predictions
# -------------------------------
submission = pd.read_csv("sample_submission.csv")
submission['target'] = test_preds
submission.to_csv("ensemble_submission.csv", index=False)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-05-30 19:04:58,401] A new study created in memory with name: no-name-3de2efcd-1bd7-4003-8280-bb7d7c22a13b


Tuning XGBoost...


[I 2025-05-30 19:05:02,660] Trial 0 finished with value: 11.221333444423117 and parameters: {'n_estimators': 227, 'max_depth': 8, 'learning_rate': 0.27964051937949663, 'subsample': 0.8725241601621998, 'colsample_bytree': 0.8595980244611049}. Best is trial 0 with value: 11.221333444423117.
[I 2025-05-30 19:05:21,835] Trial 1 finished with value: 10.18316508465198 and parameters: {'n_estimators': 464, 'max_depth': 11, 'learning_rate': 0.06675376761818957, 'subsample': 0.6114823401124703, 'colsample_bytree': 0.5894733762427035}. Best is trial 1 with value: 10.18316508465198.
[I 2025-05-30 19:05:30,506] Trial 2 finished with value: 10.901369146190945 and parameters: {'n_estimators': 787, 'max_depth': 9, 'learning_rate': 0.2605084501299366, 'subsample': 0.9570445854896719, 'colsample_bytree': 0.6146829948921183}. Best is trial 1 with value: 10.18316508465198.
[I 2025-05-30 19:05:38,278] Trial 3 finished with value: 10.886916553180408 and parameters: {'n_estimators': 854, 'max_depth': 7, 'le

Tuning LightGBM...
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006445 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[969]	valid_0's l2: 130.471


[I 2025-05-30 19:15:31,550] Trial 0 finished with value: 11.422394565769387 and parameters: {'n_estimators': 969, 'max_depth': 11, 'learning_rate': 0.06989358740583426, 'subsample': 0.9361094720100294, 'colsample_bytree': 0.623068174644005}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.014764 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[458]	valid_0's l2: 154.356


[I 2025-05-30 19:15:37,575] Trial 1 finished with value: 12.42401056947972 and parameters: {'n_estimators': 458, 'max_depth': 8, 'learning_rate': 0.0755742940322633, 'subsample': 0.8308794895748874, 'colsample_bytree': 0.57070133540103}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.010323 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[787]	valid_0's l2: 130.486


[I 2025-05-30 19:15:46,959] Trial 2 finished with value: 11.423052452296732 and parameters: {'n_estimators': 788, 'max_depth': 8, 'learning_rate': 0.2948622217259784, 'subsample': 0.7453671242290134, 'colsample_bytree': 0.967714997829915}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008189 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds


[I 2025-05-30 19:15:48,718] Trial 3 finished with value: 17.149208141452387 and parameters: {'n_estimators': 108, 'max_depth': 6, 'learning_rate': 0.02118441206489385, 'subsample': 0.9076493830281385, 'colsample_bytree': 0.5125636362788455}. Best is trial 0 with value: 11.422394565769387.


Did not meet early stopping. Best iteration is:
[108]	valid_0's l2: 294.095
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.009422 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds


[I 2025-05-30 19:15:50,871] Trial 4 finished with value: 12.417793292145047 and parameters: {'n_estimators': 156, 'max_depth': 7, 'learning_rate': 0.28983021923153135, 'subsample': 0.6112958014425143, 'colsample_bytree': 0.9312787967440198}. Best is trial 0 with value: 11.422394565769387.


Did not meet early stopping. Best iteration is:
[156]	valid_0's l2: 154.202
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008233 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[379]	valid_0's l2: 193.423


[I 2025-05-30 19:15:53,535] Trial 5 finished with value: 13.907647731417725 and parameters: {'n_estimators': 379, 'max_depth': 4, 'learning_rate': 0.09588932066120409, 'subsample': 0.6349089135546309, 'colsample_bytree': 0.8483920651536827}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008306 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[921]	valid_0's l2: 136.097


[I 2025-05-30 19:15:59,233] Trial 6 finished with value: 11.666069975427767 and parameters: {'n_estimators': 921, 'max_depth': 4, 'learning_rate': 0.27779697205141574, 'subsample': 0.7534103395206259, 'colsample_bytree': 0.9776513447494735}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.011624 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[614]	valid_0's l2: 142.636


[I 2025-05-30 19:16:06,339] Trial 7 finished with value: 11.943045337323387 and parameters: {'n_estimators': 614, 'max_depth': 6, 'learning_rate': 0.08130040705381494, 'subsample': 0.6624189827719174, 'colsample_bytree': 0.8226050313247779}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008085 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[515]	valid_0's l2: 135.289


[I 2025-05-30 19:16:11,508] Trial 8 finished with value: 11.631369649095687 and parameters: {'n_estimators': 516, 'max_depth': 5, 'learning_rate': 0.18957479248188344, 'subsample': 0.694446541585162, 'colsample_bytree': 0.814363583189738}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.010996 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[486]	valid_0's l2: 134.782


[I 2025-05-30 19:16:16,397] Trial 9 finished with value: 11.609584593796297 and parameters: {'n_estimators': 487, 'max_depth': 5, 'learning_rate': 0.26080635675639097, 'subsample': 0.9870955657182705, 'colsample_bytree': 0.6492049506117423}. Best is trial 0 with value: 11.422394565769387.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007652 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[951]	valid_0's l2: 120.309


[I 2025-05-30 19:16:28,815] Trial 10 finished with value: 10.968547661173222 and parameters: {'n_estimators': 951, 'max_depth': 12, 'learning_rate': 0.17085421465703668, 'subsample': 0.9958689051291488, 'colsample_bytree': 0.6994848121446806}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007572 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[996]	valid_0's l2: 121.489


[I 2025-05-30 19:16:41,634] Trial 11 finished with value: 11.022203897647211 and parameters: {'n_estimators': 996, 'max_depth': 12, 'learning_rate': 0.17746669384485408, 'subsample': 0.9996627237151375, 'colsample_bytree': 0.6822109462163599}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008168 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[789]	valid_0's l2: 124.781


[I 2025-05-30 19:16:51,644] Trial 12 finished with value: 11.17054237124463 and parameters: {'n_estimators': 789, 'max_depth': 12, 'learning_rate': 0.17045309003435125, 'subsample': 0.5251417258734976, 'colsample_bytree': 0.7079568709023133}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.009046 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[743]	valid_0's l2: 121.996


[I 2025-05-30 19:17:01,072] Trial 13 finished with value: 11.045194676490784 and parameters: {'n_estimators': 826, 'max_depth': 10, 'learning_rate': 0.2330729420036281, 'subsample': 0.8646993661019378, 'colsample_bytree': 0.7374372492393921}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007762 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[678]	valid_0's l2: 128.004


[I 2025-05-30 19:17:09,806] Trial 14 finished with value: 11.313883891802531 and parameters: {'n_estimators': 678, 'max_depth': 10, 'learning_rate': 0.13210538406624134, 'subsample': 0.9991405123684788, 'colsample_bytree': 0.6714307966727765}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.015575 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[870]	valid_0's l2: 120.407


[I 2025-05-30 19:17:21,211] Trial 15 finished with value: 10.972992618167806 and parameters: {'n_estimators': 991, 'max_depth': 12, 'learning_rate': 0.20319614642138642, 'subsample': 0.8476144803903238, 'colsample_bytree': 0.7669114453803774}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008142 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[872]	valid_0's l2: 124.09


[I 2025-05-30 19:17:31,999] Trial 16 finished with value: 11.139560321652382 and parameters: {'n_estimators': 872, 'max_depth': 10, 'learning_rate': 0.21973991704460766, 'subsample': 0.8212832386967017, 'colsample_bytree': 0.7685212400285327}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006978 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[690]	valid_0's l2: 128.221


[I 2025-05-30 19:17:40,678] Trial 17 finished with value: 11.323472235837876 and parameters: {'n_estimators': 690, 'max_depth': 9, 'learning_rate': 0.12854513043342564, 'subsample': 0.909520163007077, 'colsample_bytree': 0.8732816040022617}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007853 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds


[I 2025-05-30 19:17:44,027] Trial 18 finished with value: 12.10646518341475 and parameters: {'n_estimators': 236, 'max_depth': 12, 'learning_rate': 0.22378081764319252, 'subsample': 0.7636055397701911, 'colsample_bytree': 0.7597096777579116}. Best is trial 10 with value: 10.968547661173222.


Did not meet early stopping. Best iteration is:
[236]	valid_0's l2: 146.566
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008254 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[716]	valid_0's l2: 125.316


[I 2025-05-30 19:17:53,206] Trial 19 finished with value: 11.194453717085707 and parameters: {'n_estimators': 717, 'max_depth': 11, 'learning_rate': 0.20100318965075722, 'subsample': 0.9461953574086679, 'colsample_bytree': 0.604385978586888}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007703 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[903]	valid_0's l2: 121.379


[I 2025-05-30 19:18:04,930] Trial 20 finished with value: 11.017192519283928 and parameters: {'n_estimators': 903, 'max_depth': 11, 'learning_rate': 0.1259529243346758, 'subsample': 0.8594082490127744, 'colsample_bytree': 0.9050733683978893}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008283 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[915]	valid_0's l2: 120.699


[I 2025-05-30 19:18:16,938] Trial 21 finished with value: 10.986302568446908 and parameters: {'n_estimators': 915, 'max_depth': 11, 'learning_rate': 0.14771296377308324, 'subsample': 0.8577895943237734, 'colsample_bytree': 0.886205220962324}. Best is trial 10 with value: 10.968547661173222.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.008342 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[971]	valid_0's l2: 119.875


[I 2025-05-30 19:18:29,829] Trial 22 finished with value: 10.948740034939101 and parameters: {'n_estimators': 971, 'max_depth': 12, 'learning_rate': 0.15429076817837464, 'subsample': 0.7910783779037036, 'colsample_bytree': 0.8010080629660268}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007932 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[963]	valid_0's l2: 119.882


[I 2025-05-30 19:18:42,486] Trial 23 finished with value: 10.949074895754817 and parameters: {'n_estimators': 964, 'max_depth': 12, 'learning_rate': 0.16250827985763083, 'subsample': 0.7819837484688654, 'colsample_bytree': 0.7857456529921361}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006208 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[802]	valid_0's l2: 123.745


[I 2025-05-30 19:18:52,222] Trial 24 finished with value: 11.124081222997463 and parameters: {'n_estimators': 802, 'max_depth': 9, 'learning_rate': 0.16225557683818176, 'subsample': 0.7999962732192295, 'colsample_bytree': 0.8150090459876049}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006615 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[851]	valid_0's l2: 122.511


[I 2025-05-30 19:19:04,239] Trial 25 finished with value: 11.068486328796372 and parameters: {'n_estimators': 851, 'max_depth': 12, 'learning_rate': 0.14911798435715717, 'subsample': 0.568348883229566, 'colsample_bytree': 0.7214392769607839}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006904 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[586]	valid_0's l2: 135.997


[I 2025-05-30 19:19:11,659] Trial 26 finished with value: 11.66177492791176 and parameters: {'n_estimators': 586, 'max_depth': 9, 'learning_rate': 0.10139257475034194, 'subsample': 0.7094944410358404, 'colsample_bytree': 0.7877216433097992}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.006239 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[943]	valid_0's l2: 142.024


[I 2025-05-30 19:19:24,448] Trial 27 finished with value: 11.917389548235933 and parameters: {'n_estimators': 943, 'max_depth': 11, 'learning_rate': 0.047859800910253936, 'subsample': 0.7770729757703575, 'colsample_bytree': 0.6891291908111379}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.007376 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[723]	valid_0's l2: 128.909


[I 2025-05-30 19:19:33,761] Trial 28 finished with value: 11.353812752603993 and parameters: {'n_estimators': 723, 'max_depth': 10, 'learning_rate': 0.11684556290671741, 'subsample': 0.7134454747500655, 'colsample_bytree': 0.8430939387180405}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10149
[LightGBM] [Info] Number of data points in the train set: 49287, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.26 MB) transferred to GPU in 0.009338 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.070120
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[864]	valid_0's l2: 125.107


[I 2025-05-30 19:19:45,100] Trial 29 finished with value: 11.18512243230003 and parameters: {'n_estimators': 990, 'max_depth': 11, 'learning_rate': 0.24859398991813983, 'subsample': 0.9502461507432908, 'colsample_bytree': 0.6412272309805894}. Best is trial 22 with value: 10.948740034939101.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10163
[LightGBM] [Info] Number of data points in the train set: 61609, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.82 MB) transferred to GPU in 0.010041 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.067328


[I 2025-05-30 19:19:57,278] A new study created in memory with name: no-name-e449afd2-bab0-485d-8e6a-ed4fe55a4b8e


Tuning CatBoost...


[I 2025-05-30 19:20:31,332] Trial 0 finished with value: 13.845742839466096 and parameters: {'iterations': 646, 'depth': 4, 'learning_rate': 0.15633468881973317, 'subsample': 0.5658000444935128}. Best is trial 0 with value: 13.845742839466096.
[I 2025-05-30 19:20:41,399] Trial 1 finished with value: 12.073876884515377 and parameters: {'iterations': 478, 'depth': 9, 'learning_rate': 0.056472779327085935, 'subsample': 0.9338323905447357}. Best is trial 1 with value: 12.073876884515377.
[I 2025-05-30 19:20:54,130] Trial 2 finished with value: 13.526423357204566 and parameters: {'iterations': 531, 'depth': 6, 'learning_rate': 0.15535844279393085, 'subsample': 0.9063230435008536}. Best is trial 1 with value: 12.073876884515377.
[I 2025-05-30 19:21:07,519] Trial 3 finished with value: 14.17697077176677 and parameters: {'iterations': 628, 'depth': 5, 'learning_rate': 0.11476538291895998, 'subsample': 0.7605206122587056}. Best is trial 1 with value: 12.073876884515377.
[I 2025-05-30 19:21:10,0

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 10163
[LightGBM] [Info] Number of data points in the train set: 61609, number of used features: 62
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 48 dense feature groups (2.82 MB) transferred to GPU in 0.006747 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 52.067328





📊 Ensemble Validation Metrics:
 - RMSE: 2.6421
 - MAPE: 0.1254
 - R²:   0.9850
