In [1]:
import numpy as np
import pandas as pd
import os
import pandas as pd
import sys

# Config

In [2]:
HYPER_OPT = True 
HYPER_OPT_TIME = 3600*11
USE_ORIGINAL_DATA = True 
SUBMIT_TO_KAGGLE = False

In [3]:
# Define the base directory (where the notebook is running)
base_dir = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

# Define the data directory
data_dir = os.path.join(base_dir, "data")

model_dir = os.path.join(base_dir, "models")

sys.path.append(base_dir)

# Data Extraction

In [4]:

# # Construct file paths
# train_file = os.path.join(data_dir, "train.csv")
# test_file = os.path.join(data_dir, "test.csv")
# original_file = os.path.join(data_dir, "Insurance Premium Prediction Dataset.csv")

# # Load the datasets
# train_df = pd.read_csv(train_file, index_col="id")
# test_df = pd.read_csv(test_file, index_col="id")
# original_df = pd.read_csv(original_file)

In [5]:
from axyom_utilities.data_extraction import extract_data

X_train, y_train, X_test, X_orig, y_orig = extract_data(data_dir, USE_ORIGINAL_DATA)

# Data Cleaning

In [6]:
from axyom_utilities.preprocessing import preprocess 

#original_df = original_df.dropna(subset=["Premium Amount"])

# train_df = preprocess(train_df)
# test_df = preprocess(test_df)
# original_df = preprocess(original_df)

X_train = preprocess(X_train)
X_test = preprocess(X_test)
if USE_ORIGINAL_DATA:
    X_orig = preprocess(X_orig)

# HyperOpt

In [7]:
import json
from axyom_utilities.wrappers import XGBRegressorWrapper
from axyom_utilities.training import train_model_cv
import optuna
import torch
from optuna.samplers import TPESampler
from optuna.visualization.matplotlib import (
    plot_optimization_history, 
    plot_param_importances, 
    plot_parallel_coordinate,
    plot_slice,
    plot_contour
)
import matplotlib.pyplot as plt

if HYPER_OPT:
    fixed_params = {
        "n_estimators": 10000,
        "objective": "reg:squarederror",  # XGBoost regression objective
        "tree_method": "gpu_hist" if torch.cuda.is_available() else "auto",
        "verbosity": 0,
        "enable_categorical": True
    }
    
    # Define the Optuna objective function
    def objective(trial):
        # Define hyperparameter space
        varying_params = {
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),
            "max_depth": trial.suggest_int("max_depth", 8, 15),
            "min_child_weight": trial.suggest_float("min_child_weight", 1e-3, 50, log=True),
            "subsample": trial.suggest_float("subsample", 0.4, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
            "gamma": trial.suggest_float("gamma", 0, 5),
            "lambda": trial.suggest_float("lambda", 1e-3, 10, log=True),
            "alpha": trial.suggest_float("alpha", 1e-3, 10, log=True)
        }

        xgb_params = {**fixed_params, **varying_params}
        
        # Initialize XGBRegressor with trial parameters
        model = XGBRegressorWrapper(**xgb_params)
        
        # Evaluate using K-Fold CV with early stopping
        results = train_model_cv(\
            model, 
            X_train, 
            y_train, 
            X_test, 
            X_orig,
            y_orig,
            cv_splits=5, 
            early_stopping_rounds=50
        )
        score = results['cv_scores'].mean()

        trial.set_user_attr("best_iteration", results['best_iteration'])
        
        return score
    
    # Prepare data
    # Replace X_train, y_train, and X_test with your data
    # Example:
    # X_train, X_test, y_train = ...
    
    # Run Optuna optimization
    study = optuna.create_study( \
        direction="minimize", 
        study_name="XGB_v1", 
        storage="sqlite:///xgb_study_v1.db", 
        load_if_exists=True,
        sampler=TPESampler(seed=666)
    )
    study.optimize(objective, n_trials=100, timeout=HYPER_OPT_TIME)
    
    # Best parameters and result
    print("Best Trial: ", study.best_trial.params)
    print("Best RMSE: ", study.best_value)

    xgb_best_params = {**fixed_params, **study.best_trial.params}

    xgb_best_params["n_estimators"] = study.best_trial.user_attrs.get("best_iteration", None)

    with open("xgb_best_params.json", "w") as f:
        json.dump(xgb_best_params, f, indent=4)
   
    plot_optimization_history(study)
    plt.show()
    
    plot_param_importances(study)
    plt.show()
    
    plot_slice(study)
    plt.show()

else:
    xgb_best_params = {
        'n_estimators': 2225,
        'objective': 'reg:squarederror',
        'tree_method': 'gpu_hist',
        'verbosity': 0,
        'enable_categorical': True,
        'learning_rate': 0.003059929305190928,
        'max_depth': 8,
        'min_child_weight': 12.496270561250991,
        'subsample': 0.8428246186530037,
        'colsample_bytree': 0.9999895920675128,
        'gamma': 2.937438656382514,
        'lambda': 1.5752155403171972,
        'alpha': 0.4038060866963702
    }


[I 2024-12-16 21:51:16,367] Using an existing study with name 'XGB_v1' instead of creating a new one.


Training fold 1...
Fold 1 RMSE: 1.0499
Training fold 2...
Fold 2 RMSE: 1.0488
Training fold 3...
Fold 3 RMSE: 1.0473
Training fold 4...
Fold 4 RMSE: 1.0503
Training fold 5...


[I 2024-12-16 21:53:29,427] Trial 2 finished with value: 1.048994163843117 and parameters: {'learning_rate': 0.05433350003437882, 'max_depth': 14, 'min_child_weight': 1.509804001525835, 'subsample': 0.8367148343488449, 'colsample_bytree': 0.9708747744678036, 'gamma': 0.06351598517383816, 'lambda': 0.04511809923805299, 'alpha': 0.0015676574662308934}. Best is trial 2 with value: 1.048994163843117.


Fold 5 RMSE: 1.0487
Mean CV RMSE: 1.0490 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0488
Training fold 2...
Fold 2 RMSE: 1.0477
Training fold 3...
Fold 3 RMSE: 1.0463
Training fold 4...
Fold 4 RMSE: 1.0488
Training fold 5...
Fold 5 RMSE: 1.0475
Mean CV RMSE: 1.0478 ± 0.0009


[I 2024-12-16 22:26:00,832] Trial 3 finished with value: 1.0478290056562236 and parameters: {'learning_rate': 0.001768215377734423, 'max_depth': 12, 'min_child_weight': 0.008728853020978604, 'subsample': 0.8464925012884582, 'colsample_bytree': 0.5157352018243503, 'gamma': 3.5042237608299063, 'lambda': 0.014890607747597583, 'alpha': 1.2529040614881077}. Best is trial 3 with value: 1.0478290056562236.


Training fold 1...
Fold 1 RMSE: 1.0549
Training fold 2...
Fold 2 RMSE: 1.0536
Training fold 3...
Fold 3 RMSE: 1.0522
Training fold 4...
Fold 4 RMSE: 1.0555
Training fold 5...


[I 2024-12-16 22:42:36,427] Trial 4 finished with value: 1.0540149557096832 and parameters: {'learning_rate': 0.0010295684202914674, 'max_depth': 8, 'min_child_weight': 0.003321757899981517, 'subsample': 0.5486009372170182, 'colsample_bytree': 0.4139417795433424, 'gamma': 3.6366057711758453, 'lambda': 0.02291605035157054, 'alpha': 0.006166129274351907}. Best is trial 3 with value: 1.0478290056562236.


Fold 5 RMSE: 1.0538
Mean CV RMSE: 1.0540 ± 0.0011
Training fold 1...
Fold 1 RMSE: 1.0602
Training fold 2...
Fold 2 RMSE: 1.0588
Training fold 3...
Fold 3 RMSE: 1.0581
Training fold 4...
Fold 4 RMSE: 1.0609
Training fold 5...


[I 2024-12-16 22:44:09,563] Trial 5 finished with value: 1.0593466521559864 and parameters: {'learning_rate': 0.17870968683687566, 'max_depth': 15, 'min_child_weight': 0.318876983451918, 'subsample': 0.5554791096213735, 'colsample_bytree': 0.7502875712951831, 'gamma': 1.6284532646680434, 'lambda': 3.594158746720598, 'alpha': 0.3203451159453505}. Best is trial 3 with value: 1.0478290056562236.


Fold 5 RMSE: 1.0588
Mean CV RMSE: 1.0593 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0510
Training fold 2...
Fold 2 RMSE: 1.0499
Training fold 3...
Fold 3 RMSE: 1.0489
Training fold 4...
Fold 4 RMSE: 1.0511
Training fold 5...


[I 2024-12-16 22:45:08,919] Trial 6 finished with value: 1.0501439351811883 and parameters: {'learning_rate': 0.10677004453213144, 'max_depth': 12, 'min_child_weight': 0.09080655845269026, 'subsample': 0.8458283150473813, 'colsample_bytree': 0.6217578285385542, 'gamma': 0.375832719735959, 'lambda': 1.261165012072524, 'alpha': 0.007544613176679852}. Best is trial 3 with value: 1.0478290056562236.


Fold 5 RMSE: 1.0499
Mean CV RMSE: 1.0501 ± 0.0008
Training fold 1...
Fold 1 RMSE: 1.0487
Training fold 2...
Fold 2 RMSE: 1.0475
Training fold 3...
Fold 3 RMSE: 1.0460
Training fold 4...
Fold 4 RMSE: 1.0485
Training fold 5...
Fold 5 RMSE: 1.0473
Mean CV RMSE: 1.0476 ± 0.0010


[I 2024-12-16 23:32:24,295] Trial 7 finished with value: 1.0475861522959087 and parameters: {'learning_rate': 0.001572316850677651, 'max_depth': 11, 'min_child_weight': 0.005273622710529421, 'subsample': 0.8970790799514776, 'colsample_bytree': 0.5148211407627984, 'gamma': 1.352044768751754, 'lambda': 0.17544366233758285, 'alpha': 4.069317345142579}. Best is trial 7 with value: 1.0475861522959087.


Training fold 1...
Fold 1 RMSE: 1.0525
Training fold 2...
Fold 2 RMSE: 1.0516
Training fold 3...
Fold 3 RMSE: 1.0500
Training fold 4...
Fold 4 RMSE: 1.0520
Training fold 5...


[I 2024-12-16 23:33:00,185] Trial 8 finished with value: 1.0514551495253919 and parameters: {'learning_rate': 0.1288199143020136, 'max_depth': 11, 'min_child_weight': 0.0706208895177166, 'subsample': 0.40973230799046567, 'colsample_bytree': 0.5795280231067159, 'gamma': 1.7688910936390445, 'lambda': 3.749822076655429, 'alpha': 1.394910288740228}. Best is trial 7 with value: 1.0475861522959087.


Fold 5 RMSE: 1.0512
Mean CV RMSE: 1.0515 ± 0.0008
Training fold 1...
Fold 1 RMSE: 1.0503
Training fold 2...
Fold 2 RMSE: 1.0490
Training fold 3...
Fold 3 RMSE: 1.0480
Training fold 4...
Fold 4 RMSE: 1.0502
Training fold 5...


[I 2024-12-16 23:33:37,965] Trial 9 finished with value: 1.0493600828501675 and parameters: {'learning_rate': 0.08143647168295229, 'max_depth': 11, 'min_child_weight': 4.431174429966464, 'subsample': 0.6785848861398642, 'colsample_bytree': 0.5090620994307342, 'gamma': 4.420128023795511, 'lambda': 0.7501862009222005, 'alpha': 0.48699578287890244}. Best is trial 7 with value: 1.0475861522959087.


Fold 5 RMSE: 1.0494
Mean CV RMSE: 1.0494 ± 0.0008
Training fold 1...
Fold 1 RMSE: 1.0474
Training fold 2...
Fold 2 RMSE: 1.0463
Training fold 3...
Fold 3 RMSE: 1.0449
Training fold 4...
Fold 4 RMSE: 1.0475
Training fold 5...
Fold 5 RMSE: 1.0462
Mean CV RMSE: 1.0465 ± 0.0009


[I 2024-12-16 23:43:56,586] Trial 10 finished with value: 1.04645564144077 and parameters: {'learning_rate': 0.004320551911885695, 'max_depth': 11, 'min_child_weight': 0.0011948540563670102, 'subsample': 0.5409962960406868, 'colsample_bytree': 0.7067027866999082, 'gamma': 1.460046202059528, 'lambda': 0.10175928165655608, 'alpha': 0.09842207562639121}. Best is trial 10 with value: 1.04645564144077.


Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0440
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...
Fold 5 RMSE: 1.0453
Mean CV RMSE: 1.0455 ± 0.0009


[I 2024-12-16 23:55:56,057] Trial 11 finished with value: 1.0454830269442963 and parameters: {'learning_rate': 0.0018074015405110902, 'max_depth': 11, 'min_child_weight': 35.82538209022429, 'subsample': 0.8430826729425029, 'colsample_bytree': 0.8317303646698226, 'gamma': 4.465216932504686, 'lambda': 7.090856965711639, 'alpha': 0.00614046006689163}. Best is trial 11 with value: 1.0454830269442963.


Training fold 1...
Fold 1 RMSE: 1.0466
Training fold 2...
Fold 2 RMSE: 1.0454
Training fold 3...
Fold 3 RMSE: 1.0441
Training fold 4...
Fold 4 RMSE: 1.0467
Training fold 5...


[I 2024-12-16 23:57:09,406] Trial 12 finished with value: 1.045651103880564 and parameters: {'learning_rate': 0.009400945131082061, 'max_depth': 8, 'min_child_weight': 17.66047343840021, 'subsample': 0.9713920422846007, 'colsample_bytree': 0.8892753632074072, 'gamma': 4.866253783371921, 'lambda': 0.004611852426900528, 'alpha': 0.03684461480788173}. Best is trial 11 with value: 1.0454830269442963.


Fold 5 RMSE: 1.0455
Mean CV RMSE: 1.0457 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0466
Training fold 2...
Fold 2 RMSE: 1.0454
Training fold 3...
Fold 3 RMSE: 1.0441
Training fold 4...
Fold 4 RMSE: 1.0467
Training fold 5...


[I 2024-12-16 23:58:08,759] Trial 13 finished with value: 1.0456640324384179 and parameters: {'learning_rate': 0.011488727419687842, 'max_depth': 8, 'min_child_weight': 20.344596524445297, 'subsample': 0.9849367826553198, 'colsample_bytree': 0.8654303243924336, 'gamma': 4.767219514134811, 'lambda': 0.0020077924453237683, 'alpha': 0.03433674363313339}. Best is trial 11 with value: 1.0454830269442963.


Fold 5 RMSE: 1.0455
Mean CV RMSE: 1.0457 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0465
Training fold 2...
Fold 2 RMSE: 1.0454
Training fold 3...
Fold 3 RMSE: 1.0440
Training fold 4...
Fold 4 RMSE: 1.0466
Training fold 5...


[I 2024-12-16 23:59:24,948] Trial 14 finished with value: 1.045547317991709 and parameters: {'learning_rate': 0.011587069462410312, 'max_depth': 9, 'min_child_weight': 39.62468870296583, 'subsample': 0.979599616505807, 'colsample_bytree': 0.849454630061222, 'gamma': 3.8295240444484078, 'lambda': 0.001027665566481729, 'alpha': 0.01732901003671875}. Best is trial 11 with value: 1.0454830269442963.


Fold 5 RMSE: 1.0453
Mean CV RMSE: 1.0455 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0468
Training fold 2...
Fold 2 RMSE: 1.0457
Training fold 3...
Fold 3 RMSE: 1.0443
Training fold 4...
Fold 4 RMSE: 1.0468
Training fold 5...


[I 2024-12-17 00:00:05,759] Trial 15 finished with value: 1.0458314066192373 and parameters: {'learning_rate': 0.03194696217941441, 'max_depth': 9, 'min_child_weight': 42.02046319929978, 'subsample': 0.7439418577471766, 'colsample_bytree': 0.8155922713829251, 'gamma': 3.600005995752612, 'lambda': 0.0014881972229552932, 'alpha': 0.0011784748227089113}. Best is trial 11 with value: 1.0454830269442963.


Fold 5 RMSE: 1.0456
Mean CV RMSE: 1.0458 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0463
Training fold 5...


[I 2024-12-17 00:05:38,734] Trial 16 finished with value: 1.0454291619134592 and parameters: {'learning_rate': 0.003742750014416757, 'max_depth': 10, 'min_child_weight': 3.627068843726137, 'subsample': 0.7675192241268168, 'colsample_bytree': 0.9613435327497694, 'gamma': 2.707664369008234, 'lambda': 0.49608798503256685, 'alpha': 0.008035116498791017}. Best is trial 16 with value: 1.0454291619134592.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0467
Training fold 2...
Fold 2 RMSE: 1.0457
Training fold 3...
Fold 3 RMSE: 1.0442
Training fold 4...
Fold 4 RMSE: 1.0466
Training fold 5...
Fold 5 RMSE: 1.0455
Mean CV RMSE: 1.0458 ± 0.0009


[I 2024-12-17 00:18:40,824] Trial 17 finished with value: 1.0457726111314543 and parameters: {'learning_rate': 0.0033720333844194546, 'max_depth': 13, 'min_child_weight': 3.8662364637749365, 'subsample': 0.7174339800218963, 'colsample_bytree': 0.9646283252073747, 'gamma': 2.69806031887569, 'lambda': 7.973455513997151, 'alpha': 0.003650475710082196}. Best is trial 16 with value: 1.0454291619134592.


Training fold 1...
Fold 1 RMSE: 1.0467
Training fold 2...
Fold 2 RMSE: 1.0456
Training fold 3...
Fold 3 RMSE: 1.0443
Training fold 4...
Fold 4 RMSE: 1.0468
Training fold 5...
Fold 5 RMSE: 1.0456
Mean CV RMSE: 1.0458 ± 0.0009


[I 2024-12-17 00:24:42,730] Trial 18 finished with value: 1.0457968961264168 and parameters: {'learning_rate': 0.004505593226052351, 'max_depth': 10, 'min_child_weight': 0.9842447446595114, 'subsample': 0.6423197483492797, 'colsample_bytree': 0.7804837291963207, 'gamma': 2.7826260703713097, 'lambda': 0.5556358324188969, 'alpha': 0.016193731171747397}. Best is trial 16 with value: 1.0454291619134592.


Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...
Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009


[I 2024-12-17 00:32:22,441] Trial 19 finished with value: 1.045434280680779 and parameters: {'learning_rate': 0.002670769300447097, 'max_depth': 10, 'min_child_weight': 7.511147296525409, 'subsample': 0.784507025193432, 'colsample_bytree': 0.9238059460710545, 'gamma': 2.286630477666964, 'lambda': 0.30429681760032806, 'alpha': 0.003413464312779463}. Best is trial 16 with value: 1.0454291619134592.


Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 00:36:16,085] Trial 20 finished with value: 1.0454636676126154 and parameters: {'learning_rate': 0.005760770635434498, 'max_depth': 10, 'min_child_weight': 6.599922874593271, 'subsample': 0.7502732404046113, 'colsample_bytree': 0.9193583925083266, 'gamma': 2.178470023546367, 'lambda': 0.2875779943678673, 'alpha': 0.09413389736675846}. Best is trial 16 with value: 1.0454291619134592.


Fold 5 RMSE: 1.0453
Mean CV RMSE: 1.0455 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0465
Training fold 2...
Fold 2 RMSE: 1.0456
Training fold 3...
Fold 3 RMSE: 1.0443
Training fold 4...
Fold 4 RMSE: 1.0467
Training fold 5...


[I 2024-12-17 00:37:15,680] Trial 21 finished with value: 1.045756527236213 and parameters: {'learning_rate': 0.0226585809832132, 'max_depth': 10, 'min_child_weight': 0.6845186003223416, 'subsample': 0.6161093577970366, 'colsample_bytree': 0.9936815577179293, 'gamma': 0.8483493067149817, 'lambda': 1.378295129564904, 'alpha': 0.0023956030357665057}. Best is trial 16 with value: 1.0454291619134592.


Fold 5 RMSE: 1.0456
Mean CV RMSE: 1.0458 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 00:42:28,424] Trial 22 finished with value: 1.0454026242735197 and parameters: {'learning_rate': 0.002920099556583298, 'max_depth': 9, 'min_child_weight': 9.195292520612849, 'subsample': 0.7934373349754068, 'colsample_bytree': 0.9335201324755311, 'gamma': 3.1120597386444615, 'lambda': 0.3025929088404199, 'alpha': 0.016417775580559877}. Best is trial 22 with value: 1.0454026242735197.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0452
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 00:48:44,180] Trial 23 finished with value: 1.0453837144565397 and parameters: {'learning_rate': 0.0026639994606664002, 'max_depth': 9, 'min_child_weight': 8.032436910591848, 'subsample': 0.7961012143398528, 'colsample_bytree': 0.9438323934559976, 'gamma': 2.988217950738047, 'lambda': 0.1715486590901371, 'alpha': 0.01517695217402667}. Best is trial 23 with value: 1.0453837144565397.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 00:51:22,132] Trial 24 finished with value: 1.0454402057828902 and parameters: {'learning_rate': 0.007102572897917363, 'max_depth': 9, 'min_child_weight': 2.6304597620591, 'subsample': 0.7903204017210748, 'colsample_bytree': 0.9314098699649346, 'gamma': 2.976680251558798, 'lambda': 0.07104383848777393, 'alpha': 0.0167600188290207}. Best is trial 23 with value: 1.0453837144565397.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0452
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0463
Training fold 5...


[I 2024-12-17 00:57:32,680] Trial 25 finished with value: 1.0453613381313727 and parameters: {'learning_rate': 0.0024562693815393744, 'max_depth': 9, 'min_child_weight': 11.687439486531844, 'subsample': 0.9199126034731623, 'colsample_bytree': 0.9972729656937434, 'gamma': 3.2628018782035406, 'lambda': 0.1606950464092559, 'alpha': 0.03388437535861661}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0452
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0463
Training fold 5...
Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009


[I 2024-12-17 01:09:37,654] Trial 26 finished with value: 1.045368086644693 and parameters: {'learning_rate': 0.0011825877045558346, 'max_depth': 9, 'min_child_weight': 12.165192471966876, 'subsample': 0.9163310373731692, 'colsample_bytree': 0.9895889688089055, 'gamma': 3.265546893215477, 'lambda': 0.1379611338359896, 'alpha': 0.05028347705822193}. Best is trial 25 with value: 1.0453613381313727.


Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0439
Training fold 4...
Fold 4 RMSE: 1.0465
Training fold 5...


[I 2024-12-17 01:19:40,259] Trial 27 finished with value: 1.045505924140646 and parameters: {'learning_rate': 0.001346189554969161, 'max_depth': 8, 'min_child_weight': 14.079991975158913, 'subsample': 0.8914483485732925, 'colsample_bytree': 0.994980376640635, 'gamma': 3.3135769941675144, 'lambda': 0.02690087624762238, 'alpha': 0.05488379908771929}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0453
Mean CV RMSE: 1.0455 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 01:25:24,249] Trial 28 finished with value: 1.0453962273699466 and parameters: {'learning_rate': 0.0022863349106052038, 'max_depth': 9, 'min_child_weight': 0.43463274233090166, 'subsample': 0.9098653288486309, 'colsample_bytree': 0.9996437753505708, 'gamma': 4.133726658272436, 'lambda': 0.1412659563265008, 'alpha': 0.2656050681067541}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0464
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...
Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009


[I 2024-12-17 01:38:40,969] Trial 29 finished with value: 1.045418074348428 and parameters: {'learning_rate': 0.0010232807471275242, 'max_depth': 9, 'min_child_weight': 0.1591819650943226, 'subsample': 0.9296260895939548, 'colsample_bytree': 0.9015494001092689, 'gamma': 3.993464718652871, 'lambda': 0.00879320783222039, 'alpha': 0.05277342879843005}. Best is trial 25 with value: 1.0453613381313727.


Training fold 1...
Fold 1 RMSE: 1.0478
Training fold 2...
Fold 2 RMSE: 1.0467
Training fold 3...
Fold 3 RMSE: 1.0456
Training fold 4...
Fold 4 RMSE: 1.0483
Training fold 5...


[I 2024-12-17 01:38:53,352] Trial 30 finished with value: 1.0470374574626442 and parameters: {'learning_rate': 0.2942135021462481, 'max_depth': 8, 'min_child_weight': 2.034781599895151, 'subsample': 0.9198600052756488, 'colsample_bytree': 0.7406001424084412, 'gamma': 2.3418542933589093, 'lambda': 0.07406659280344369, 'alpha': 0.25880665101457284}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0467
Mean CV RMSE: 1.0470 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0482
Training fold 2...
Fold 2 RMSE: 1.0471
Training fold 3...
Fold 3 RMSE: 1.0457
Training fold 4...
Fold 4 RMSE: 1.0479
Training fold 5...


[I 2024-12-17 01:42:51,370] Trial 31 finished with value: 1.0471479797346994 and parameters: {'learning_rate': 0.017133351125432572, 'max_depth': 13, 'min_child_weight': 1.300463398545396, 'subsample': 0.8691673109233743, 'colsample_bytree': 0.6477295330709955, 'gamma': 3.2081274045953285, 'lambda': 0.046141972414924445, 'alpha': 0.1359161956147205}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0468
Mean CV RMSE: 1.0471 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0473
Training fold 2...
Fold 2 RMSE: 1.0460
Training fold 3...
Fold 3 RMSE: 1.0446
Training fold 4...
Fold 4 RMSE: 1.0473
Training fold 5...


[I 2024-12-17 01:43:27,736] Trial 32 finished with value: 1.04618162065431 and parameters: {'learning_rate': 0.050804251886536644, 'max_depth': 10, 'min_child_weight': 0.02523664236029901, 'subsample': 0.9568364279751503, 'colsample_bytree': 0.7861166081339875, 'gamma': 1.9364881418295579, 'lambda': 0.046339229900298644, 'alpha': 0.03766959204220514}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0458
Mean CV RMSE: 1.0462 ± 0.0010
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 01:48:52,103] Trial 33 finished with value: 1.045387617912112 and parameters: {'learning_rate': 0.0023466531509615256, 'max_depth': 9, 'min_child_weight': 17.426046412650564, 'subsample': 0.931003831910785, 'colsample_bytree': 0.9975335503914388, 'gamma': 4.187565610828517, 'lambda': 0.15671273620587126, 'alpha': 0.14833231619729312}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0463
Training fold 2...
Fold 2 RMSE: 1.0452
Training fold 3...
Fold 3 RMSE: 1.0438
Training fold 4...
Fold 4 RMSE: 1.0464
Training fold 5...


[I 2024-12-17 01:55:27,779] Trial 34 finished with value: 1.0453818789183145 and parameters: {'learning_rate': 0.0021978412286356554, 'max_depth': 9, 'min_child_weight': 13.769796614512282, 'subsample': 0.8201686655552866, 'colsample_bytree': 0.9517316594253322, 'gamma': 4.196887300383792, 'lambda': 0.1770413282973609, 'alpha': 0.11609821800436655}. Best is trial 25 with value: 1.0453613381313727.


Fold 5 RMSE: 1.0452
Mean CV RMSE: 1.0454 ± 0.0009
Training fold 1...
Fold 1 RMSE: 1.0465
Training fold 2...
Fold 2 RMSE: 1.0453
Training fold 3...
Fold 3 RMSE: 1.0440
Training fold 4...
Fold 4 RMSE: 1.0466
Training fold 5...


[W 2024-12-17 02:03:58,323] Trial 35 failed with parameters: {'learning_rate': 0.0014055817540153878, 'max_depth': 8, 'min_child_weight': 9.001701045150105, 'subsample': 0.8123380792736161, 'colsample_bytree': 0.8764072067850127, 'gamma': 3.55474318759224, 'lambda': 0.21305813081580105, 'alpha': 0.6444099583891106} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Axyom\anaconda3\envs\kaggle_env\lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Axyom\AppData\Local\Temp\ipykernel_10324\762608703.py", line 45, in objective
    results = train_model_cv(\
  File "c:\Users\Axyom\Kaggle\PG S4e12 - Regression Insurance\axyom_utilities\training.py", line 32, in train_model_cv
    model.fit(
  File "c:\Users\Axyom\Kaggle\PG S4e12 - Regression Insurance\axyom_utilities\wrappers.py", line 41, in fit
    self.xgb_model_.fit(
  File "c:\Users\Axyom\anaconda3\envs\kaggle_env\

KeyboardInterrupt: 

In [None]:
xgb_best_params

In [None]:
xgb_best_params["n_estimators"] = 10000

# Model Training

In [None]:
# Initialize a CatBoost Regressor
model = XGBRegressorWrapper(**xgb_best_params)

# Use the train_model function to train and evaluate the model
results = train_model_cv(
    model=model,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    X_orig=X_orig,
    y_orig=y_orig,
    early_stopping_rounds=100,
    cv_splits=7
)

In [None]:
mean_score = np.mean(results["cv_scores"])
std_score = np.std(results["cv_scores"])

# Prepare the data
data = {
    "mean_score": mean_score,
    "std_score": std_score
}

# Save to a JSON file
with open("score.json", "w") as json_file:
    json.dump(data, json_file, indent=4)  # Use `indent` for readability

In [None]:
import joblib

# Save OOF predictions as a CSV file
oof_preds_df = pd.DataFrame({"oof_preds": results["oof_preds"]})
oof_preds_df.to_csv("oof_preds", index=False)
print("OOF predictions saved to oof_preds.csv.")

test_preds_df = pd.DataFrame({"test_preds": results["test_preds"]})
test_preds_df.to_csv("test_preds", index=False)
print("Test predictions saved to test_preds.csv.")

joblib.dump(results["models"], "xgboost_models.pkl")

# Submission

In [None]:
y_pred = np.expm1(results['test_preds'])

submission = pd.DataFrame({
    'id': X_test.index,  
    'Premium Amount': y_pred
})

submission.to_csv('submission.csv', index=False)

In [None]:
# import os
# from IPython.display import display, Javascript

# # Define your message and file paths
# COMP_NAME = "playground-series-s4e12"
# FILE_PATH = "submission.csv"

# SUBMIT_MESSAGE = f"Clean XGB: Mean score: {mean_score:.4f} +/- {std_score:.4f}"

# # Submit to Kaggle
# if SUBMIT_TO_KAGGLE: 
#     os.system(f'kaggle competitions submit -c {COMP_NAME} -f {FILE_PATH} -m "{SUBMIT_MESSAGE}"')

# # Git commit and push
# GIT_COMMIT_MESSAGE = f"Submission: {SUBMIT_MESSAGE}"

# # save notebook
# # display(Javascript('IPython.notebook.save_checkpoint()'))

# # Commands for Git
# os.system("git add .")  # Stage all changes (adjust if you only want specific files)
# os.system(f'git commit -m "{GIT_COMMIT_MESSAGE}"')  # Commit changes with a message
# os.system("git push origin main")  # Push to the main branch (change branch if needed)