In [1]:
# ==============================================================================
# VERSION 6: ROBUST, POWERFUL, FROM-SCRATCH SCRIPT
# - Single, powerful Tweedie Regressor.
# - Extensive feature engineering (User-level, Lag, Rolling).
# - K-Fold ensemble training for stability and performance.
# - Pre-selected robust hyperparameters (no Optuna).
# - Includes TQDM progress bar.
# - Written for maximum library compatibility.
# ==============================================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score
import xgboost as xgb
from tqdm.notebook import tqdm  # Use tqdm.notebook for Jupyter, or just tqdm for scripts
import gc
import warnings

warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

# --- 1. CONFIGURATION ---
print("--- CONFIGURATION ---")
TRAIN_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/train_data.csv'
TEST_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/test_data.csv'
N_SPLITS = 5  # Using 5 folds for our robust training
# A pre-selected, robust set of hyperparameters for the Tweedie Regressor
XGB_PARAMS = {
    'objective': 'reg:tweedie',
    'tweedie_variance_power': 1.6,
    'n_estimators': 1500,
    'learning_rate': 0.02,
    'max_depth': 8,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 5,
    'eval_metric': 'rmse',
    'random_state': 42,
    'n_jobs': -1,
}

# --- 2. FEATURE ENGINEERING FUNCTION ---
def create_all_features(df):
    """Master function to create all features on the combined dataframe."""
    print("  > Starting feature engineering...")
    
    # Date & Time Features
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df['sessionHour'] = pd.to_datetime(df['sessionStart'], unit='s').dt.hour
    df['sessionDayOfWeek'] = df['date'].dt.dayofweek
    df['sessionMonth'] = df['date'].dt.month
    
    # Interaction & Ratio Features
    df['hits_per_pageview'] = df['totalHits'] / (df['pageViews'].fillna(0) + 1)
    
    # User-Level Aggregates
    df['userId'] = df['userId'].astype(str)
    user_agg = df.groupby('userId').agg(
        user_total_hits=('totalHits', 'sum'),
        user_avg_hits=('totalHits', 'mean'),
        user_session_count=('sessionId', 'nunique'),
        user_unique_days=('date', 'nunique')
    )
    user_agg['user_avg_sessions_per_day'] = user_agg['user_session_count'] / user_agg['user_unique_days']
    df = pd.merge(df, user_agg, on='userId', how='left')

    # Time-Series Features (Lag & Rolling)
    df_sorted = df.sort_values(['userId', 'date', 'sessionStart']).copy()
    df_sorted['time_since_last_session'] = df_sorted.groupby('userId')['sessionStart'].diff()
    df_sorted['user_rolling_avg_hits_3'] = df_sorted.groupby('userId')['totalHits'].transform(
        lambda s: s.rolling(3, min_periods=1).mean()
    )
    
    # Drop temporary/redundant columns
    cols_to_drop = ['date', 'sessionStart', 'sessionId']
    df_sorted = df_sorted.drop(columns=cols_to_drop, errors='ignore')
    
    print("  > Feature engineering complete.")
    gc.collect()
    return df_sorted

# --- 3. DATA LOADING & PREPARATION ---
print("\n--- DATA LOADING & PREPARATION ---")
df_train = pd.read_csv(TRAIN_FILE_PATH, dtype={'fullVisitorId': 'str'})
df_test = pd.read_csv(TEST_FILE_PATH, dtype={'fullVisitorId': 'str'})

test_indices = df_test.index

one_value_cols = [col for col in df_train.columns if df_train[col].nunique(dropna=False) == 1]
df_train = df_train.drop(columns=one_value_cols)
df_test = df_test.drop(columns=[c for c in one_value_cols if c in df_test.columns], errors='ignore')

train_len = len(df_train)
combined_df = pd.concat([df_train, df_test], axis=0, sort=False)
combined_df_featured = create_all_features(combined_df)

# Handle NaNs in the target variable AFTER feature creation
combined_df_featured['purchaseValue'] = combined_df_featured['purchaseValue'].fillna(0)

# Split back into train and test
X = combined_df_featured[:train_len].drop(columns=['purchaseValue'])
y = combined_df_featured[:train_len]['purchaseValue'] / 1e6 # Scale target
X_test = combined_df_featured[train_len:].drop(columns=['purchaseValue'])

# --- 4. PREPROCESSING SETUP ---
print("\n--- PREPROCESSING SETUP ---")
# Define column types
numerical_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
categorical_cols.remove('userId') # For grouping only

# Enforce string type on all categorical columns to prevent mixed-type errors
for col in categorical_cols:
    X[col] = X[col].astype(str)
    X_test[col] = X_test[col].astype(str)

# Create the master preprocessor
preprocessor = ColumnTransformer([
    ('num', Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]), numerical_cols),
    ('cat', Pipeline([
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('ohe', OneHotEncoder(handle_unknown='ignore')) # Compatible with older sklearn
    ]), categorical_cols)
], remainder='drop') # Drop any columns not specified (like userId)

# --- 5. MODEL TRAINING ---
print("\n--- MODEL TRAINING (K-FOLD ENSEMBLE) ---")
test_predictions = np.zeros(len(X_test))
oof_predictions = np.zeros(len(X))
gkf = GroupKFold(n_splits=N_SPLITS)

for fold, (train_idx, val_idx) in enumerate(tqdm(gkf.split(X, y, groups=X['userId']), total=N_SPLITS, desc="Training Folds")):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Fit preprocessor on this fold's training data and transform all sets
    X_train_processed = preprocessor.fit_transform(X_train)
    X_val_processed = preprocessor.transform(X_val)
    
    model = xgb.XGBRegressor(**XGB_PARAMS, random_state=42 + fold)
    model.fit(X_train_processed, y_train)
              
    val_preds = model.predict(X_val_processed)
    # ROBUSTNESS: Convert any potential NaN predictions from the model to 0
    val_preds = np.nan_to_num(val_preds, nan=0.0)
    oof_predictions[val_idx] = val_preds
    
    # Predict on the test set for this fold
    X_test_processed = preprocessor.transform(X_test)
    test_fold_preds = model.predict(X_test_processed)
    test_fold_preds = np.nan_to_num(test_fold_preds, nan=0.0)
    test_predictions += test_fold_preds / N_SPLITS
    
    gc.collect()

# --- 6. EVALUATION & SUBMISSION ---
print("\n--- EVALUATION & SUBMISSION ---")
oof_r2 = r2_score(y, oof_predictions)
print(f"Overall Out-of-Fold R2 Score: {oof_r2:.5f}")

# Finalize predictions
test_predictions[test_predictions < 0] = 0
final_predictions_scaled = test_predictions * 1e6

# Create submission file
submission_df = pd.DataFrame({'ID': test_indices, 'purchaseValue': final_predictions_scaled})
submission_df.to_csv('submission_robust_final.csv', index=False)
print("Submission file 'submission_robust_final.csv' created successfully.")
print(submission_df.head())

--- CONFIGURATION ---

--- DATA LOADING & PREPARATION ---
  > Starting feature engineering...
  > Feature engineering complete.

--- PREPROCESSING SETUP ---

--- MODEL TRAINING (K-FOLD ENSEMBLE) ---


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [2]:
# ==============================================================================
# VERSION 7: FINAL ROBUST SCRIPT
# - Uses standard `tqdm` for universal compatibility.
# - Single, powerful Tweedie Regressor with robust pre-set parameters.
# - Extensive feature engineering.
# - K-Fold ensemble training for stability.
# ==============================================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score
import xgboost as xgb
from tqdm import tqdm  # !!!!! MAJOR CHANGE HERE: Using the standard, universal tqdm !!!!!
import gc
import warnings

warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

# --- 1. CONFIGURATION ---
print("--- CONFIGURATION ---")
TRAIN_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/train_data.csv'
TEST_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/test_data.csv'
N_SPLITS = 5  # Using 5 folds for our robust training
XGB_PARAMS = {
    'objective': 'reg:tweedie',
    'tweedie_variance_power': 1.6,
    'n_estimators': 1500,
    'learning_rate': 0.02,
    'max_depth': 8,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 5,
    'eval_metric': 'rmse',
    'random_state': 42,
    'n_jobs': -1,
}

# --- 2. FEATURE ENGINEERING FUNCTION ---
def create_all_features(df):
    """Master function to create all features on the combined dataframe."""
    print("  > Starting feature engineering...")
    
    # Date & Time Features
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df['sessionHour'] = pd.to_datetime(df['sessionStart'], unit='s').dt.hour
    df['sessionDayOfWeek'] = df['date'].dt.dayofweek
    df['sessionMonth'] = df['date'].dt.month
    
    # Interaction & Ratio Features
    df['hits_per_pageview'] = df['totalHits'] / (df['pageViews'].fillna(0) + 1)
    
    # User-Level Aggregates
    df['userId'] = df['userId'].astype(str)
    user_agg = df.groupby('userId').agg(
        user_total_hits=('totalHits', 'sum'),
        user_avg_hits=('totalHits', 'mean'),
        user_session_count=('sessionId', 'nunique'),
        user_unique_days=('date', 'nunique')
    )
    user_agg['user_avg_sessions_per_day'] = user_agg['user_session_count'] / user_agg['user_unique_days']
    df = pd.merge(df, user_agg, on='userId', how='left')

    # Time-Series Features (Lag & Rolling)
    df_sorted = df.sort_values(['userId', 'date', 'sessionStart']).copy()
    df_sorted['time_since_last_session'] = df_sorted.groupby('userId')['sessionStart'].diff()
    df_sorted['user_rolling_avg_hits_3'] = df_sorted.groupby('userId')['totalHits'].transform(
        lambda s: s.rolling(3, min_periods=1).mean()
    )
    
    # Drop temporary/redundant columns
    cols_to_drop = ['date', 'sessionStart', 'sessionId']
    df_sorted = df_sorted.drop(columns=cols_to_drop, errors='ignore')
    
    print("  > Feature engineering complete.")
    gc.collect()
    return df_sorted

# --- 3. DATA LOADING & PREPARATION ---
print("\n--- DATA LOADING & PREPARATION ---")
df_train = pd.read_csv(TRAIN_FILE_PATH, dtype={'fullVisitorId': 'str'})
df_test = pd.read_csv(TEST_FILE_PATH, dtype={'fullVisitorId': 'str'})

test_indices = df_test.index

one_value_cols = [col for col in df_train.columns if df_train[col].nunique(dropna=False) == 1]
df_train = df_train.drop(columns=one_value_cols)
df_test = df_test.drop(columns=[c for c in one_value_cols if c in df_test.columns], errors='ignore')

train_len = len(df_train)
combined_df = pd.concat([df_train, df_test], axis=0, sort=False)
combined_df_featured = create_all_features(combined_df)

# Handle NaNs in the target variable AFTER feature creation
combined_df_featured['purchaseValue'] = combined_df_featured['purchaseValue'].fillna(0)

# Split back into train and test
X = combined_df_featured[:train_len].drop(columns=['purchaseValue'])
y = combined_df_featured[:train_len]['purchaseValue'] / 1e6 # Scale target
X_test = combined_df_featured[train_len:].drop(columns=['purchaseValue'])

# --- 4. PREPROCESSING SETUP ---
print("\n--- PREPROCESSING SETUP ---")
# Define column types
numerical_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
categorical_cols.remove('userId') # For grouping only

# Enforce string type on all categorical columns to prevent mixed-type errors
for col in categorical_cols:
    X[col] = X[col].astype(str)
    X_test[col] = X_test[col].astype(str)

# Create the master preprocessor
preprocessor = ColumnTransformer([
    ('num', Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]), numerical_cols),
    ('cat', Pipeline([
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('ohe', OneHotEncoder(handle_unknown='ignore'))
    ]), categorical_cols)
], remainder='drop') # Drop any columns not specified (like userId)

# --- 5. MODEL TRAINING ---
print("\n--- MODEL TRAINING (K-FOLD ENSEMBLE) ---")
test_predictions = np.zeros(len(X_test))
oof_predictions = np.zeros(len(X))
gkf = GroupKFold(n_splits=N_SPLITS)

for fold, (train_idx, val_idx) in enumerate(tqdm(gkf.split(X, y, groups=X['userId']), total=N_SPLITS, desc="Training Folds")):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Fit preprocessor on this fold's training data and transform all sets
    X_train_processed = preprocessor.fit_transform(X_train)
    X_val_processed = preprocessor.transform(X_val)
    
    model = xgb.XGBRegressor(**XGB_PARAMS, random_state=42 + fold)
    model.fit(X_train_processed, y_train)
              
    val_preds = model.predict(X_val_processed)
    # ROBUSTNESS: Convert any potential NaN predictions from the model to 0
    val_preds = np.nan_to_num(val_preds, nan=0.0)
    oof_predictions[val_idx] = val_preds
    
    # Predict on the test set for this fold
    X_test_processed = preprocessor.transform(X_test)
    test_fold_preds = model.predict(X_test_processed)
    test_fold_preds = np.nan_to_num(test_fold_preds, nan=0.0)
    test_predictions += test_fold_preds / N_SPLITS
    
    gc.collect()

# --- 6. EVALUATION & SUBMISSION ---
print("\n--- EVALUATION & SUBMISSION ---")
oof_r2 = r2_score(y, oof_predictions)
print(f"Overall Out-of-Fold R2 Score: {oof_r2:.5f}")

# Finalize predictions
test_predictions[test_predictions < 0] = 0
final_predictions_scaled = test_predictions * 1e6

# Create submission file
submission_df = pd.DataFrame({'ID': test_indices, 'purchaseValue': final_predictions_scaled})
submission_df.to_csv('submission_robust_final.csv', index=False)
print("Submission file 'submission_robust_final.csv' created successfully.")
print(submission_df.head())

--- CONFIGURATION ---

--- DATA LOADING & PREPARATION ---
  > Starting feature engineering...
  > Feature engineering complete.

--- PREPROCESSING SETUP ---

--- MODEL TRAINING (K-FOLD ENSEMBLE) ---


Training Folds:   0%|          | 0/5 [00:01<?, ?it/s]


TypeError: xgboost.sklearn.XGBRegressor() got multiple values for keyword argument 'random_state'

In [None]:
# ==============================================================================
# VERSION 7.1: FINAL ROBUST SCRIPT (Corrected)
# - Fixed "multiple values for keyword argument" TypeError.
# - This is the final, robust, and logically sound version.
# ==============================================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score
import xgboost as xgb
from tqdm import tqdm
import gc
import warnings

warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

# --- 1. CONFIGURATION ---
print("--- CONFIGURATION ---")
TRAIN_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/train_data.csv'
TEST_FILE_PATH = '/Users/shrinarayan/Desktop/Prediction-PurchaseValues/dataset/test_data.csv'
N_SPLITS = 5  # Using 5 folds for our robust training

# !!!!! MAJOR CHANGE HERE: Removed 'random_state' from the dictionary !!!!!
XGB_PARAMS = {
    'objective': 'reg:tweedie',
    'tweedie_variance_power': 1.6,
    'n_estimators': 1500,
    'learning_rate': 0.02,
    'max_depth': 8,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 5,
    'eval_metric': 'rmse',
    'n_jobs': -1,
}

# --- 2. FEATURE ENGINEERING FUNCTION ---
def create_all_features(df):
    """Master function to create all features on the combined dataframe."""
    print("  > Starting feature engineering...")
    
    # Date & Time Features
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df['sessionHour'] = pd.to_datetime(df['sessionStart'], unit='s').dt.hour
    df['sessionDayOfWeek'] = df['date'].dt.dayofweek
    df['sessionMonth'] = df['date'].dt.month
    
    # Interaction & Ratio Features
    df['hits_per_pageview'] = df['totalHits'] / (df['pageViews'].fillna(0) + 1)
    
    # User-Level Aggregates
    df['userId'] = df['userId'].astype(str)
    user_agg = df.groupby('userId').agg(
        user_total_hits=('totalHits', 'sum'),
        user_avg_hits=('totalHits', 'mean'),
        user_session_count=('sessionId', 'nunique'),
        user_unique_days=('date', 'nunique')
    )
    user_agg['user_avg_sessions_per_day'] = user_agg['user_session_count'] / user_agg['user_unique_days']
    df = pd.merge(df, user_agg, on='userId', how='left')

    # Time-Series Features (Lag & Rolling)
    df_sorted = df.sort_values(['userId', 'date', 'sessionStart']).copy()
    df_sorted['time_since_last_session'] = df_sorted.groupby('userId')['sessionStart'].diff()
    df_sorted['user_rolling_avg_hits_3'] = df_sorted.groupby('userId')['totalHits'].transform(
        lambda s: s.rolling(3, min_periods=1).mean()
    )
    
    # Drop temporary/redundant columns
    cols_to_drop = ['date', 'sessionStart', 'sessionId']
    df_sorted = df_sorted.drop(columns=cols_to_drop, errors='ignore')
    
    print("  > Feature engineering complete.")
    gc.collect()
    return df_sorted

# --- 3. DATA LOADING & PREPARATION ---
print("\n--- DATA LOADING & PREPARATION ---")
df_train = pd.read_csv(TRAIN_FILE_PATH, dtype={'fullVisitorId': 'str'})
df_test = pd.read_csv(TEST_FILE_PATH, dtype={'fullVisitorId': 'str'})

test_indices = df_test.index

one_value_cols = [col for col in df_train.columns if df_train[col].nunique(dropna=False) == 1]
df_train = df_train.drop(columns=one_value_cols)
df_test = df_test.drop(columns=[c for c in one_value_cols if c in df_test.columns], errors='ignore')

train_len = len(df_train)
combined_df = pd.concat([df_train, df_test], axis=0, sort=False)
combined_df_featured = create_all_features(combined_df)

# Handle NaNs in the target variable AFTER feature creation
combined_df_featured['purchaseValue'] = combined_df_featured['purchaseValue'].fillna(0)

# Split back into train and test
X = combined_df_featured[:train_len].drop(columns=['purchaseValue'])
y = combined_df_featured[:train_len]['purchaseValue'] / 1e6 # Scale target
X_test = combined_df_featured[train_len:].drop(columns=['purchaseValue'])

# --- 4. PREPROCESSING SETUP ---
print("\n--- PREPROCESSING SETUP ---")
# Define column types
numerical_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
categorical_cols.remove('userId') # For grouping only

# Enforce string type on all categorical columns to prevent mixed-type errors
for col in categorical_cols:
    X[col] = X[col].astype(str)
    X_test[col] = X_test[col].astype(str)

# Create the master preprocessor
preprocessor = ColumnTransformer([
    ('num', Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]), numerical_cols),
    ('cat', Pipeline([
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('ohe', OneHotEncoder(handle_unknown='ignore'))
    ]), categorical_cols)
], remainder='drop') # Drop any columns not specified (like userId)

# --- 5. MODEL TRAINING ---
print("\n--- MODEL TRAINING (K-FOLD ENSEMBLE) ---")
test_predictions = np.zeros(len(X_test))
oof_predictions = np.zeros(len(X))
gkf = GroupKFold(n_splits=N_SPLITS)

for fold, (train_idx, val_idx) in enumerate(tqdm(gkf.split(X, y, groups=X['userId']), total=N_SPLITS, desc="Training Folds")):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Fit preprocessor on this fold's training data and transform all sets
    X_train_processed = preprocessor.fit_transform(X_train)
    X_val_processed = preprocessor.transform(X_val)
    
    # The 'random_state' is now correctly passed only once
    model = xgb.XGBRegressor(**XGB_PARAMS, random_state=42 + fold)
    model.fit(X_train_processed, y_train)
              
    val_preds = model.predict(X_val_processed)
    val_preds = np.nan_to_num(val_preds, nan=0.0)
    oof_predictions[val_idx] = val_preds
    
    # Predict on the test set for this fold
    X_test_processed = preprocessor.transform(X_test)
    test_fold_preds = model.predict(X_test_processed)
    test_fold_preds = np.nan_to_num(test_fold_preds, nan=0.0)
    test_predictions += test_fold_preds / N_SPLITS
    
    gc.collect()

# --- 6. EVALUATION & SUBMISSION ---
print("\n--- EVALUATION & SUBMISSION ---")
oof_r2 = r2_score(y, oof_predictions)
print(f"Overall Out-of-Fold R2 Score: {oof_r2:.5f}")

# Finalize predictions
test_predictions[test_predictions < 0] = 0
final_predictions_scaled = test_predictions * 1e6

# Create submission file
submission_df = pd.DataFrame({'ID': test_indices, 'purchaseValue': final_predictions_scaled})
submission_df.to_csv('submission_robust_final.csv', index=False)
print("Submission file 'submission_robust_final.csv' created successfully.")
print(submission_df.head())

--- CONFIGURATION ---

--- DATA LOADING & PREPARATION ---
  > Starting feature engineering...


Exception ignored in: <function tqdm.__del__ at 0x14d5ea040>
Traceback (most recent call last):
  File "/Users/shrinarayan/Desktop/Prediction-PurchaseValues/venv/lib/python3.9/site-packages/tqdm/std.py", line 1148, in __del__
    self.close()
  File "/Users/shrinarayan/Desktop/Prediction-PurchaseValues/venv/lib/python3.9/site-packages/tqdm/notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


  > Feature engineering complete.

--- PREPROCESSING SETUP ---

--- MODEL TRAINING (K-FOLD ENSEMBLE) ---


Training Folds: 100%|██████████| 5/5 [01:08<00:00, 13.76s/it]


--- EVALUATION & SUBMISSION ---
Overall Out-of-Fold R2 Score: 0.05332
Submission file 'submission_robust_final.csv' created successfully.
   ID  purchaseValue
0   0   1.406660e-07
1   1   2.875419e-05
2   2   1.180671e-05
3   3   1.739838e-05
4   4   1.378051e-05



