In [2]:
# ==============================================================================
# VERSION: Robust & Leak-Free Model
# This version implements a leak-proof cross-validation scheme for reliable
# hyperparameter tuning and trains a powerful final model.
# ==============================================================================
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import optuna
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# --- Define File Paths ---
TRAIN_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/train_data.csv'
TEST_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/test_data.csv'

# ==============================================================================
# PART 1: FEATURE ENGINEERING & PREPROCESSING CLASSES (Largely Unchanged)
# ==============================================================================

class AdvancedFeatureEngineering(BaseEstimator, TransformerMixin):
    """Creates time-based and interaction features from session-level data."""
    def fit(self, X, y=None): return self
    def transform(self, X):
        X_copy = X.copy()
        X_copy['date'] = pd.to_datetime(X_copy['date'], format='%Y%m%d')
        X_copy['sessionYear'] = X_copy['date'].dt.year
        X_copy['sessionMonth'] = X_copy['date'].dt.month
        X_copy['sessionDayOfWeek'] = X_copy['date'].dt.dayofweek
        X_copy['sessionHour'] = pd.to_datetime(X_copy['sessionStart'], unit='s').dt.hour
        X_copy['is_weekend'] = (X_copy['sessionDayOfWeek'] >= 5).astype(int)
        X_copy['month_day_interaction'] = X_copy['sessionMonth'].astype(str) + '_' + X_copy['sessionDayOfWeek'].astype(str)
        X_copy['browser_os_interaction'] = X_copy['browser'].astype(str) + '_' + X_copy['os'].astype(str)
        X_copy['geo_channel_interaction'] = X_copy['geoNetwork.continent'].astype(str) + '_' + X_copy['userChannel'].astype(str)
        X_copy['device_channel_interaction'] = X_copy['deviceType'].astype(str) + '_' + X_copy['userChannel'].astype(str)
        X_copy['hits_per_pageview'] = X_copy['totalHits'] / (X_copy['pageViews'].replace(0, 1)) # Avoid division by zero
        X_copy['ad_page_binned'] = X_copy['trafficSource.adwordsClickInfo.page'].apply(lambda p: 1 if p == 1.0 else (2 if pd.notna(p) else 0))
        cols_to_drop = ['date', 'sessionStart', 'sessionId', 'trafficSource.adwordsClickInfo.page']
        X_copy = X_copy.drop(columns=cols_to_drop, errors='ignore')
        return X_copy

class TargetEncoder(BaseEstimator, TransformerMixin):
    """Target encodes categorical features with smoothing."""
    def __init__(self, columns=None, smoothing=10):
        self.columns, self.smoothing = columns, smoothing
        self.mappings_, self.global_mean_ = {}, 0
    def fit(self, X, y):
        X_fit, y_fit = X.copy(), y.copy()
        self.global_mean_ = np.mean(y_fit)
        for col in self.columns:
            X_fit[col] = X_fit[col].fillna('missing').astype(str)
            agg = y_fit.groupby(X_fit[col]).agg(['mean', 'count'])
            smooth_mean = (agg['count'] * agg['mean'] + self.smoothing * self.global_mean_) / (agg['count'] + self.smoothing)
            self.mappings_[col] = smooth_mean.to_dict()
        return self
    def transform(self, X):
        X_transform = X.copy()
        for col in self.columns:
            X_transform[col] = X_transform[col].fillna('missing').astype(str)
            X_transform[col] = X_transform[col].map(self.mappings_[col]).fillna(self.global_mean_)
        return X_transform

def create_user_aggregates(df):
    """Creates user-level aggregates. Designed to be used on a training fold."""
    agg = df.groupby('userId').agg(
        user_session_count=('sessionId', 'nunique'),
        user_total_hits=('totalHits', 'sum'),
        user_avg_hits=('totalHits', 'mean'),
        user_total_pageviews=('pageViews', 'sum'),
        user_avg_pageviews=('pageViews', 'mean'),
        user_purchase_count=('made_purchase', 'sum'),
        user_total_purchase_value=('purchaseValue', 'sum'),
    ).reset_index()
    agg['user_conversion_rate'] = agg['user_purchase_count'] / agg['user_session_count']
    agg['user_avg_purchase_value'] = agg['user_total_purchase_value'] / (agg['user_purchase_count'] + 1e-6)
    return agg

# ==============================================================================
# PART 2: DATA LOADING AND INITIAL PREPARATION
# ==============================================================================

print("Loading and preparing data...")
df_train = pd.read_csv(TRAIN_FILE_PATH, dtype={'fullVisitorId': 'str', 'sessionId': 'str'})
df_test = pd.read_csv(TEST_FILE_PATH, dtype={'fullVisitorId': 'str', 'sessionId': 'str'})

# Unify userId column name
df_train.rename(columns={'fullVisitorId': 'userId'}, inplace=True)
df_test.rename(columns={'fullVisitorId': 'userId'}, inplace=True)

# Drop constant columns
one_value_cols = [col for col in df_train.columns if df_train[col].nunique(dropna=False) == 1]
df_train = df_train.drop(columns=one_value_cols)
df_test = df_test.drop(columns=[c for c in one_value_cols if c in df_test.columns], errors='ignore')

# Prepare target variable and related columns
df_train['purchaseValue'] = df_train['purchaseValue'].fillna(0).astype(float)
df_train['made_purchase'] = (df_train['purchaseValue'] > 0).astype(int)
df_train['log_purchaseValue'] = np.log1p(df_train['purchaseValue'] / 1e6) # Scale before log

# Define features and target
X = df_train.drop(columns=['log_purchaseValue'])
y = df_train['log_purchaseValue']

# Get column names after initial feature engineering
temp_engineered_df = AdvancedFeatureEngineering().fit_transform(X)
user_level_numerical_cols = [
    'user_session_count', 'user_total_hits', 'user_avg_hits', 'user_total_pageviews',
    'user_avg_pageviews', 'user_purchase_count', 'user_total_purchase_value',
    'user_conversion_rate', 'user_avg_purchase_value'
]
session_level_numerical_cols = ['sessionNumber', 'pageViews', 'totalHits', 'hits_per_pageview']
numerical_cols = session_level_numerical_cols + user_level_numerical_cols
categorical_cols = [col for col in temp_engineered_df.columns if col not in numerical_cols and col != 'userId']
del temp_engineered_df # Clean up

print(f"\nIdentified {len(numerical_cols)} potential numerical features.")
print(f"Identified {len(categorical_cols)} potential categorical features.")

# ==============================================================================
# PART 3: ROBUST HYPERPARAMETER TUNING WITH LEAK-FREE CV
# ==============================================================================

# ==============================================================================
# PART 3: ROBUST HYPERPARAMETER TUNING (Corrected)
# ==============================================================================

def objective(trial, X_data, y_data):
    """Optuna objective function with a leak-free CV implementation."""
    params = {
        'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'random_state': 42, 'n_jobs': -1,
        'booster': 'gbtree',
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, step=100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05, log=True),
        'max_depth': trial.suggest_int('max_depth', 5, 12),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        
        # --- FIX APPLIED HERE ---
        'gamma': trial.suggest_float('gamma', 0, 5),                               # Removed log=True
        'lambda': trial.suggest_float('lambda', 1, 10, log=True),                 # L2 (lambda) can be log-scaled as its lower bound > 0
        'alpha': trial.suggest_float('alpha', 0, 10),                             # L1 (alpha) must be linear-scaled to include 0
    }

    cv = GroupKFold(n_splits=4)
    # ... rest of the function remains exactly the same ...
    # ...
    groups = X_data['userId']
    cv_scores = []

    # --- THE LEAK-FREE CROSS-VALIDATION LOOP ---
    for fold, (train_idx, val_idx) in enumerate(cv.split(X_data, y_data, groups)):
        print(f"  --- Fold {fold+1}/4 ---")
        X_train, X_val = X_data.iloc[train_idx], X_data.iloc[val_idx]
        y_train, y_val = y_data.iloc[train_idx], y_data.iloc[val_idx]

        # 1. Create user aggregates ONLY from the training fold's data
        # This is the crucial step to prevent data leakage.
        user_aggs_fold = create_user_aggregates(X_train)

        # 2. Apply advanced feature engineering
        afe = AdvancedFeatureEngineering()
        X_train_eng = afe.fit_transform(X_train)
        X_val_eng = afe.transform(X_val)

        # 3. Merge aggregates into the engineered train and validation folds
        X_train_final = pd.merge(X_train_eng, user_aggs_fold, on='userId', how='left').fillna(0)
        X_val_final = pd.merge(X_val_eng, user_aggs_fold, on='userId', how='left').fillna(0)
        
        # 4. Define and fit the full preprocessing and model pipeline on the fold
        # The TargetEncoder is now correctly fitted only on train data for each fold.
        preprocessor = ColumnTransformer([
            ('num', Pipeline([('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]), numerical_cols),
            ('cat', TargetEncoder(columns=categorical_cols, smoothing=trial.suggest_int('smoothing', 5, 20)), categorical_cols)
        ], remainder='drop', n_jobs=-1)
        
        model = Pipeline([
            ('preprocessor', preprocessor),
            ('regressor', xgb.XGBRegressor(**params))
        ])

        model.fit(X_train_final, y_train)
        preds = model.predict(X_val_final)
        
        # Clip predictions as log values shouldn't be negative
        preds[preds < 0] = 0
        
        rmse = np.sqrt(mean_squared_error(y_val, preds))
        cv_scores.append(rmse)
        print(f"    Fold {fold+1} RMSE: {rmse:.5f}")

    mean_rmse = np.mean(cv_scores)
    print(f"  Trial Mean RMSE: {mean_rmse:.5f}\n")
    return mean_rmse

# --- Run Optuna Study ---
N_TRIALS = 30 # For a serious competition, consider 50-100 trials
print(f"\n--- Tuning Regressor with Optuna ({N_TRIALS} trials) using LEAK-FREE CV ---")
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, X, y), n_trials=N_TRIALS)

print("\nBest trial:")
best_params_from_study = study.best_trial.params
print(f"  Value (RMSE): {study.best_value:.5f}")
print("  Params: ")
for key, value in best_params_from_study.items():
    print(f"    {key}: {value}")

# ==============================================================================
# PART 4: FINAL MODEL TRAINING AND SUBMISSION
# ==============================================================================

print("\n--- Training Final Model with Best Parameters on ALL Data ---")

# 1. Create user aggregates using the ENTIRE training dataset. This is correct
# for the final model as it provides the most historical data for test predictions.
final_user_aggregates = create_user_aggregates(X)

# 2. Engineer features for the full train and test sets
afe_final = AdvancedFeatureEngineering()
X_train_eng_final = afe_final.fit_transform(X)
X_test_eng_final = afe_final.transform(df_test)

# 3. Merge aggregates into both sets
X_train_final = pd.merge(X_train_eng_final, final_user_aggregates, on='userId', how='left').fillna(0)
X_test_final = pd.merge(X_test_eng_final, final_user_aggregates, on='userId', how='left').fillna(0)

# 4. Extract best parameters and build the final pipeline
final_model_params = {k: v for k, v in best_params_from_study.items() if k not in ['smoothing']}
final_smoothing = best_params_from_study.get('smoothing', 10)

final_preprocessor = ColumnTransformer([
    ('num', Pipeline([('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]), numerical_cols),
    ('cat', TargetEncoder(columns=categorical_cols, smoothing=final_smoothing), categorical_cols)
], remainder='drop', n_jobs=-1)

final_pipeline = Pipeline([
    ('preprocessor', final_preprocessor),
    ('regressor', xgb.XGBRegressor(**final_model_params, objective='reg:squarederror', random_state=42, n_jobs=-1))
])

# 5. Train on ALL available training data
final_pipeline.fit(X_train_final, y)
print("Final model trained successfully.")


print("\n--- Generating Final Kaggle Submission ---")
# Align test columns with train columns to ensure consistency
train_cols = X_train_final.columns
X_test_final = X_test_final.reindex(columns=train_cols, fill_value=0)

# Predict log purchase value
kaggle_log_preds = final_pipeline.predict(X_test_final)
kaggle_log_preds[kaggle_log_preds < 0] = 0

# Transform prediction back to original scale (inverse of log1p and scaling)
kaggle_value_preds = np.expm1(kaggle_log_preds) * 1e6

# Create submission file
submission_df = pd.DataFrame({'ID': df_test.index, 'purchaseValue': kaggle_value_preds})
submission_df.to_csv('submission_robust_model11.csv', index=False)
print("Submission file 'submission_robust_model.csv' created successfully.")
print(submission_df.head())

Loading and preparing data...


[I 2025-07-16 20:15:28,711] A new study created in memory with name: no-name-41972966-7cab-470e-b0d7-ba93600ca3b3



Identified 13 potential numerical features.
Identified 38 potential categorical features.

--- Tuning Regressor with Optuna (30 trials) using LEAK-FREE CV ---
  --- Fold 1/4 ---
    Fold 1 RMSE: 1.40970
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.44570
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.41829
  --- Fold 4/4 ---


[I 2025-07-16 20:15:50,868] Trial 0 finished with value: 1.4218815558037812 and parameters: {'n_estimators': 1200, 'learning_rate': 0.04195599971217711, 'max_depth': 9, 'subsample': 0.8538678095370361, 'colsample_bytree': 0.8724003816032452, 'gamma': 3.963598082497761, 'lambda': 5.264359622322329, 'alpha': 0.35225822524924344, 'smoothing': 19}. Best is trial 0 with value: 1.4218815558037812.


    Fold 4 RMSE: 1.41384
  Trial Mean RMSE: 1.42188

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33851
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.34275
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.34941
  --- Fold 4/4 ---


[I 2025-07-16 20:16:13,475] Trial 1 finished with value: 1.3367521185024154 and parameters: {'n_estimators': 1500, 'learning_rate': 0.010579160198661874, 'max_depth': 7, 'subsample': 0.8814832819709645, 'colsample_bytree': 0.7062772819581706, 'gamma': 3.114669885623644, 'lambda': 2.766024822102694, 'alpha': 6.146592436887045, 'smoothing': 17}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.31634
  Trial Mean RMSE: 1.33675

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.46054
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.49089
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.46619
  --- Fold 4/4 ---


[I 2025-07-16 20:16:31,608] Trial 2 finished with value: 1.4773350043115283 and parameters: {'n_estimators': 1100, 'learning_rate': 0.04741139806781882, 'max_depth': 5, 'subsample': 0.7909419373955434, 'colsample_bytree': 0.9655051456648853, 'gamma': 2.589993236444341, 'lambda': 1.8269636148759114, 'alpha': 1.0782178093395167, 'smoothing': 10}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.49171
  Trial Mean RMSE: 1.47734

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.41078
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.38381
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.38517
  --- Fold 4/4 ---


[I 2025-07-16 20:16:49,241] Trial 3 finished with value: 1.3873755298525423 and parameters: {'n_estimators': 800, 'learning_rate': 0.026619748374141754, 'max_depth': 7, 'subsample': 0.9117604772745982, 'colsample_bytree': 0.8042862468265644, 'gamma': 1.584525179529967, 'lambda': 5.189083907090946, 'alpha': 9.669486985812823, 'smoothing': 17}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.36974
  Trial Mean RMSE: 1.38738

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.48148
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.49570
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.48566
  --- Fold 4/4 ---


[I 2025-07-16 20:17:05,621] Trial 4 finished with value: 1.4904065017504546 and parameters: {'n_estimators': 500, 'learning_rate': 0.028176283489031843, 'max_depth': 9, 'subsample': 0.7172216386351448, 'colsample_bytree': 0.9053294557044307, 'gamma': 0.5921399319752096, 'lambda': 1.7377825256444945, 'alpha': 5.583035608899251, 'smoothing': 19}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.49879
  Trial Mean RMSE: 1.49041

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.42931
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.44838
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.42160
  --- Fold 4/4 ---


[I 2025-07-16 20:17:24,521] Trial 5 finished with value: 1.4381600410292341 and parameters: {'n_estimators': 1100, 'learning_rate': 0.03726843126016152, 'max_depth': 7, 'subsample': 0.8324985275660698, 'colsample_bytree': 0.8783890610189111, 'gamma': 1.8822531216345073, 'lambda': 2.9077464736129706, 'alpha': 2.0784149057673886, 'smoothing': 15}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.45335
  Trial Mean RMSE: 1.43816

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.44373
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.46446
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.45092
  --- Fold 4/4 ---


[I 2025-07-16 20:17:42,006] Trial 6 finished with value: 1.45983792166445 and parameters: {'n_estimators': 900, 'learning_rate': 0.04652062753650455, 'max_depth': 8, 'subsample': 0.8989022470733623, 'colsample_bytree': 0.9519234813640917, 'gamma': 1.3650430479929032, 'lambda': 2.1350356166666757, 'alpha': 7.748783227936427, 'smoothing': 9}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.48025
  Trial Mean RMSE: 1.45984

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33919
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.36254
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36307
  --- Fold 4/4 ---


[I 2025-07-16 20:18:04,763] Trial 7 finished with value: 1.3493893606570975 and parameters: {'n_estimators': 1700, 'learning_rate': 0.019696347714259477, 'max_depth': 6, 'subsample': 0.9070234570197794, 'colsample_bytree': 0.8419863436819892, 'gamma': 3.5520852855098104, 'lambda': 3.415685541837198, 'alpha': 4.61309595848039, 'smoothing': 9}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.33276
  Trial Mean RMSE: 1.34939

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.46360
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.48782
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.46458
  --- Fold 4/4 ---


[I 2025-07-16 20:18:20,658] Trial 8 finished with value: 1.4756441786095649 and parameters: {'n_estimators': 500, 'learning_rate': 0.0391908253631572, 'max_depth': 8, 'subsample': 0.939645002893345, 'colsample_bytree': 0.9878562582196669, 'gamma': 2.829825697656387, 'lambda': 6.583855913463353, 'alpha': 0.680998534891023, 'smoothing': 18}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.48657
  Trial Mean RMSE: 1.47564

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.42735
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.44495
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.43786
  --- Fold 4/4 ---


[I 2025-07-16 20:18:41,441] Trial 9 finished with value: 1.4383103395906658 and parameters: {'n_estimators': 1200, 'learning_rate': 0.012341162654139048, 'max_depth': 5, 'subsample': 0.8182158571085357, 'colsample_bytree': 0.9574796612059007, 'gamma': 3.0166832706196405, 'lambda': 1.239802385003242, 'alpha': 0.8106038862273035, 'smoothing': 19}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.44308
  Trial Mean RMSE: 1.43831

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.34329
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35337
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36065
  --- Fold 4/4 ---


[I 2025-07-16 20:19:06,431] Trial 10 finished with value: 1.3452803795432975 and parameters: {'n_estimators': 1700, 'learning_rate': 0.010024473173437697, 'max_depth': 12, 'subsample': 0.9857415616142366, 'colsample_bytree': 0.7085028045872818, 'gamma': 4.986901677906902, 'lambda': 9.664201772955641, 'alpha': 4.743106677244759, 'smoothing': 13}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.32381
  Trial Mean RMSE: 1.34528

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33754
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35955
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35330
  --- Fold 4/4 ---


[I 2025-07-16 20:19:30,577] Trial 11 finished with value: 1.3458342329143513 and parameters: {'n_estimators': 1700, 'learning_rate': 0.010029596986505183, 'max_depth': 12, 'subsample': 0.984924373142753, 'colsample_bytree': 0.7000292677314683, 'gamma': 4.886335230320132, 'lambda': 8.913593181853683, 'alpha': 4.875299249895549, 'smoothing': 13}. Best is trial 1 with value: 1.3367521185024154.


    Fold 4 RMSE: 1.33295
  Trial Mean RMSE: 1.34583

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32981
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35363
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.33118
  --- Fold 4/4 ---


[I 2025-07-16 20:19:54,754] Trial 12 finished with value: 1.3326037693800346 and parameters: {'n_estimators': 2000, 'learning_rate': 0.01540385788177947, 'max_depth': 12, 'subsample': 0.9657669160377108, 'colsample_bytree': 0.7075748711332545, 'gamma': 4.967652948812618, 'lambda': 3.367077192685916, 'alpha': 6.435086012946778, 'smoothing': 5}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.31581
  Trial Mean RMSE: 1.33260

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33845
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.36616
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36818
  --- Fold 4/4 ---


[I 2025-07-16 20:20:19,042] Trial 13 finished with value: 1.352050133265718 and parameters: {'n_estimators': 2000, 'learning_rate': 0.015432229578158613, 'max_depth': 10, 'subsample': 0.959145813370515, 'colsample_bytree': 0.7591252033876367, 'gamma': 4.105100066094979, 'lambda': 3.2033608087893475, 'alpha': 7.048991366171288, 'smoothing': 5}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.33541
  Trial Mean RMSE: 1.35205

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.34416
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35835
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36640
  --- Fold 4/4 ---


[I 2025-07-16 20:20:43,158] Trial 14 finished with value: 1.3509941163104573 and parameters: {'n_estimators': 2000, 'learning_rate': 0.015517899148917727, 'max_depth': 11, 'subsample': 0.8705395062357159, 'colsample_bytree': 0.7459361855568266, 'gamma': 4.235168371501993, 'lambda': 4.337357349705002, 'alpha': 7.14329469781197, 'smoothing': 5}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.33507
  Trial Mean RMSE: 1.35099

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.47785
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.45969
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.49534
  --- Fold 4/4 ---


[I 2025-07-16 20:21:21,910] Trial 15 finished with value: 1.484842389698295 and parameters: {'n_estimators': 1500, 'learning_rate': 0.014069628761234066, 'max_depth': 10, 'subsample': 0.7764736713374789, 'colsample_bytree': 0.7774477406154716, 'gamma': 0.005544867473307846, 'lambda': 2.5676283606448087, 'alpha': 3.611254034593647, 'smoothing': 16}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.50648
  Trial Mean RMSE: 1.48484

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.35836
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.36068
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36085
  --- Fold 4/4 ---


[I 2025-07-16 20:21:43,141] Trial 16 finished with value: 1.353381562669286 and parameters: {'n_estimators': 1500, 'learning_rate': 0.018571827936967548, 'max_depth': 7, 'subsample': 0.9420400513573571, 'colsample_bytree': 0.7294552370014072, 'gamma': 3.4504476622378735, 'lambda': 1.1082547403421585, 'alpha': 8.90393933152965, 'smoothing': 7}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.33364
  Trial Mean RMSE: 1.35338

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.38497
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.37523
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.38223
  --- Fold 4/4 ---


[I 2025-07-16 20:22:06,004] Trial 17 finished with value: 1.3760327315777041 and parameters: {'n_estimators': 1500, 'learning_rate': 0.013460244435377773, 'max_depth': 11, 'subsample': 0.8796160507473809, 'colsample_bytree': 0.8072597852400534, 'gamma': 2.1182589185239378, 'lambda': 3.9977261029809483, 'alpha': 6.246790185463784, 'smoothing': 11}. Best is trial 12 with value: 1.3326037693800346.


    Fold 4 RMSE: 1.36170
  Trial Mean RMSE: 1.37603

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32692
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.34114
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.32801
  --- Fold 4/4 ---


[I 2025-07-16 20:22:33,199] Trial 18 finished with value: 1.3264493830394408 and parameters: {'n_estimators': 1900, 'learning_rate': 0.011164649797353439, 'max_depth': 6, 'subsample': 0.7061146990552216, 'colsample_bytree': 0.7821786922926288, 'gamma': 4.560588523006475, 'lambda': 2.388590274590411, 'alpha': 3.6949247191996317, 'smoothing': 15}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.30972
  Trial Mean RMSE: 1.32645

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.31888
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35795
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35101
  --- Fold 4/4 ---


[I 2025-07-16 20:22:57,186] Trial 19 finished with value: 1.336560724189468 and parameters: {'n_estimators': 1900, 'learning_rate': 0.019764509175658085, 'max_depth': 6, 'subsample': 0.7090556485514026, 'colsample_bytree': 0.7918676471388736, 'gamma': 4.652743530751559, 'lambda': 1.4640559165846607, 'alpha': 2.480821118040569, 'smoothing': 14}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.31841
  Trial Mean RMSE: 1.33656

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32938
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.34175
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.34291
  --- Fold 4/4 ---


[I 2025-07-16 20:23:21,474] Trial 20 finished with value: 1.33148119979762 and parameters: {'n_estimators': 1800, 'learning_rate': 0.011904103421767004, 'max_depth': 10, 'subsample': 0.7371134868752788, 'colsample_bytree': 0.8287565765676573, 'gamma': 4.414502498976418, 'lambda': 2.4785051368941984, 'alpha': 3.5373805191999153, 'smoothing': 7}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.31188
  Trial Mean RMSE: 1.33148

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32768
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.34885
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35212
  --- Fold 4/4 ---


[I 2025-07-16 20:23:45,844] Trial 21 finished with value: 1.3351075774126058 and parameters: {'n_estimators': 1800, 'learning_rate': 0.012046174844108836, 'max_depth': 11, 'subsample': 0.745794031886441, 'colsample_bytree': 0.8399842188835014, 'gamma': 4.546439405328126, 'lambda': 2.3742874035374615, 'alpha': 3.498327079123417, 'smoothing': 7}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.31178
  Trial Mean RMSE: 1.33511

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.35105
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.36051
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35954
  --- Fold 4/4 ---


[I 2025-07-16 20:24:12,316] Trial 22 finished with value: 1.3519998094859789 and parameters: {'n_estimators': 1900, 'learning_rate': 0.016874784348739444, 'max_depth': 10, 'subsample': 0.7508678957479391, 'colsample_bytree': 0.8227194077531431, 'gamma': 3.6393531134951163, 'lambda': 1.940792706785958, 'alpha': 3.6112042762641967, 'smoothing': 7}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.33690
  Trial Mean RMSE: 1.35200

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33256
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35582
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35283
  --- Fold 4/4 ---


[I 2025-07-16 20:24:37,823] Trial 23 finished with value: 1.341474830423481 and parameters: {'n_estimators': 2000, 'learning_rate': 0.01131370324098339, 'max_depth': 12, 'subsample': 0.7016543333143395, 'colsample_bytree': 0.7545822974941401, 'gamma': 4.489918915792743, 'lambda': 3.7420200551524125, 'alpha': 2.49896351947133, 'smoothing': 6}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.32469
  Trial Mean RMSE: 1.34147

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32632
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35481
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.33907
  --- Fold 4/4 ---


[I 2025-07-16 20:25:02,585] Trial 24 finished with value: 1.3336448868994817 and parameters: {'n_estimators': 1800, 'learning_rate': 0.012939741175385254, 'max_depth': 9, 'subsample': 0.7366286850214929, 'colsample_bytree': 0.7729223588871575, 'gamma': 4.01556556877727, 'lambda': 1.4463534932043354, 'alpha': 4.051540363363973, 'smoothing': 11}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.31437
  Trial Mean RMSE: 1.33364

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.33774
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.36144
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36755
  --- Fold 4/4 ---


[I 2025-07-16 20:25:25,135] Trial 25 finished with value: 1.346431982071436 and parameters: {'n_estimators': 1600, 'learning_rate': 0.014857217277014518, 'max_depth': 10, 'subsample': 0.7800127427651342, 'colsample_bytree': 0.732748921772175, 'gamma': 4.427104797901458, 'lambda': 2.2496000616519427, 'alpha': 1.7460589486817395, 'smoothing': 8}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.31900
  Trial Mean RMSE: 1.34643

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.40683
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.41229
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.41002
  --- Fold 4/4 ---


[I 2025-07-16 20:25:48,499] Trial 26 finished with value: 1.4100018750233858 and parameters: {'n_estimators': 1900, 'learning_rate': 0.02249599059431238, 'max_depth': 11, 'subsample': 0.8049631290012789, 'colsample_bytree': 0.9033858658768414, 'gamma': 4.991277747721752, 'lambda': 4.456553477296799, 'alpha': 6.05233110892186, 'smoothing': 5}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.41086
  Trial Mean RMSE: 1.41000

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.34418
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.35866
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35841
  --- Fold 4/4 ---


[I 2025-07-16 20:26:11,477] Trial 27 finished with value: 1.349127164914503 and parameters: {'n_estimators': 1400, 'learning_rate': 0.011490661186744829, 'max_depth': 6, 'subsample': 0.7283825876019323, 'colsample_bytree': 0.8212084067677775, 'gamma': 3.7788366941846494, 'lambda': 3.0381612166347054, 'alpha': 8.118770614810177, 'smoothing': 15}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.33526
  Trial Mean RMSE: 1.34913

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.32841
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.34806
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.35662
  --- Fold 4/4 ---


[I 2025-07-16 20:26:35,263] Trial 28 finished with value: 1.3390586935320696 and parameters: {'n_estimators': 1800, 'learning_rate': 0.017636993733221595, 'max_depth': 8, 'subsample': 0.7699437342408489, 'colsample_bytree': 0.8588921661771642, 'gamma': 4.6618070837453045, 'lambda': 1.6283736584791124, 'alpha': 2.91528470370736, 'smoothing': 6}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.32314
  Trial Mean RMSE: 1.33906

  --- Fold 1/4 ---
    Fold 1 RMSE: 1.36638
  --- Fold 2/4 ---
    Fold 2 RMSE: 1.37946
  --- Fold 3/4 ---
    Fold 3 RMSE: 1.36973
  --- Fold 4/4 ---


[I 2025-07-16 20:26:56,176] Trial 29 finished with value: 1.3644570477276092 and parameters: {'n_estimators': 1300, 'learning_rate': 0.02289427641126675, 'max_depth': 9, 'subsample': 0.8427056425411141, 'colsample_bytree': 0.7826022955412402, 'gamma': 4.174132445259895, 'lambda': 5.248461355615132, 'alpha': 4.2450241885580695, 'smoothing': 10}. Best is trial 18 with value: 1.3264493830394408.


    Fold 4 RMSE: 1.34226
  Trial Mean RMSE: 1.36446


Best trial:
  Value (RMSE): 1.32645
  Params: 
    n_estimators: 1900
    learning_rate: 0.011164649797353439
    max_depth: 6
    subsample: 0.7061146990552216
    colsample_bytree: 0.7821786922926288
    gamma: 4.560588523006475
    lambda: 2.388590274590411
    alpha: 3.6949247191996317
    smoothing: 15

--- Training Final Model with Best Parameters on ALL Data ---
Final model trained successfully.

--- Generating Final Kaggle Submission ---
Submission file 'submission_robust_model.csv' created successfully.
   ID  purchaseValue
0   0   60122.062500
1   1    2306.391602
2   2    2306.391602
3   3   16237.957031
4   4    2306.391602
