# 1. Model training

In [1]:
import numpy as np
import pandas as pd
import warnings
import os
import pickle
import optuna

import sklearn
import xgboost as xgb
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestRegressor
from feature_engine.selection import SelectBySingleFeaturePerformance
from feature_engine.datetime import DatetimeFeatures
from feature_engine.outliers import Winsorizer
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, FunctionTransformer, StandardScaler
from sklearn.compose import ColumnTransformer
from feature_engine.encoding import RareLabelEncoder, MeanEncoder, CountFrequencyEncoder
from sklearn.pipeline import FeatureUnion
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics import r2_score

# Display settings

In [2]:
pd.set_option("display.max_columns", None)

In [3]:
sklearn.set_config(transform_output='pandas')

In [4]:
warnings.filterwarnings("ignore")

# 2. Read the data

In [5]:
file_path = r"C:\Users\User\OneDrive\Desktop\Flight-Price-Prediction\Data"

In [6]:
train = pd.read_csv(f"{file_path}\\train.csv")

In [7]:
train

Unnamed: 0,airline,date_of_journey,source,destination,dep_time,arrival_time,duration,total_stops,additional_info,price
0,Jet Airways,2019-03-21,Banglore,New Delhi,08:55:00,19:10:00,615,1.0,In-flight meal not included,7832
1,Jet Airways,2019-03-27,Delhi,Cochin,17:30:00,04:25:00,655,1.0,In-flight meal not included,6540
2,Goair,2019-03-09,Banglore,New Delhi,11:40:00,14:35:00,175,0.0,No Info,7305
3,Air India,2019-06-12,Kolkata,Banglore,09:25:00,18:30:00,545,1.0,No Info,8366
4,Jet Airways,2019-03-12,Banglore,New Delhi,22:55:00,07:40:00,525,1.0,In-flight meal not included,11087
...,...,...,...,...,...,...,...,...,...,...
6690,Jet Airways,2019-03-21,Delhi,Cochin,10:45:00,18:50:00,1925,2.0,No Info,11093
6691,Air India,2019-05-01,Kolkata,Banglore,09:25:00,18:30:00,545,1.0,No Info,8891
6692,Jet Airways,2019-06-01,Delhi,Cochin,14:00:00,19:00:00,300,1.0,In-flight meal not included,10262
6693,Air Asia,2019-06-24,Delhi,Cochin,07:55:00,13:25:00,330,1.0,No Info,6152


In [8]:
val = pd.read_csv(f"{file_path}\\val.csv")

In [9]:
val

Unnamed: 0,airline,date_of_journey,source,destination,dep_time,arrival_time,duration,total_stops,additional_info,price
0,Indigo,2019-06-24,Delhi,Cochin,20:25:00,01:30:00,305,1.0,No Info,5054
1,Multiple Carriers,2019-06-12,Delhi,Cochin,09:45:00,22:30:00,765,1.0,No Info,9646
2,Jet Airways,2019-03-12,Banglore,New Delhi,22:55:00,15:15:00,980,1.0,In-flight meal not included,11087
3,Multiple Carriers,2019-06-06,Delhi,Cochin,13:00:00,21:00:00,480,1.0,No Info,13587
4,Jet Airways,2019-05-18,Delhi,Cochin,23:05:00,04:25:00,1760,2.0,No Info,16704
...,...,...,...,...,...,...,...,...,...,...
1669,Spicejet,2019-05-01,Chennai,Kolkata,09:45:00,12:00:00,135,0.0,No Info,3597
1670,Indigo,2019-05-01,Kolkata,Banglore,08:10:00,13:00:00,290,1.0,No Info,5069
1671,Jet Airways,2019-05-27,Delhi,Cochin,05:30:00,12:35:00,425,2.0,In-flight meal not included,15544
1672,Jet Airways,2019-06-12,Mumbai,Hyderabad,19:35:00,21:05:00,90,0.0,In-flight meal not included,3210


In [10]:
test = pd.read_csv(f"{file_path}\\test.csv")

In [11]:
test

Unnamed: 0,airline,date_of_journey,source,destination,dep_time,arrival_time,duration,total_stops,additional_info,price
0,Jet Airways,2019-03-06,Banglore,New Delhi,08:00:00,08:15:00,1455,1.0,No Info,17996
1,Spicejet,2019-06-06,Kolkata,Banglore,22:20:00,00:40:00,140,0.0,No Info,3873
2,Indigo,2019-03-18,Kolkata,Banglore,05:30:00,08:20:00,170,0.0,No Info,4462
3,Indigo,2019-06-27,Chennai,Kolkata,19:35:00,21:55:00,140,0.0,No Info,3597
4,Indigo,2019-05-06,Kolkata,Banglore,15:15:00,17:45:00,150,0.0,No Info,4804
...,...,...,...,...,...,...,...,...,...,...
2088,Jet Airways,2019-05-27,Delhi,Cochin,19:15:00,12:35:00,1040,1.0,In-flight meal not included,12898
2089,Multiple Carriers,2019-06-27,Delhi,Cochin,11:25:00,19:15:00,470,1.0,No Info,7155
2090,Jet Airways,2019-06-03,Delhi,Cochin,02:15:00,04:25:00,1570,1.0,In-flight meal not included,11627
2091,Multiple Carriers,2019-06-06,Delhi,Cochin,15:15:00,01:30:00,615,1.0,No Info,6795


# 3. Preprocessing operations

In [12]:
air_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy='most_frequent')),
    ("grouper", RareLabelEncoder(tol=0.1, n_categories=2, replace_with="Other")),
    ("encoder", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))
])

In [13]:
feature_to_extract = ["month", "week", "day_of_week", "day_of_month", "weekend", "year_start", "year_end"]

In [14]:
doj_transformer = Pipeline(steps=[
    ("dt", DatetimeFeatures(features_to_extract=feature_to_extract, yearfirst=True, format="mixed")),
    ("scaler", MinMaxScaler())
])

In [15]:
location_pipe1 = Pipeline(steps=[
    ("grouper", RareLabelEncoder(tol=0.1, replace_with="other", n_categories=2)),
    ("encoder", MeanEncoder()),
    ("scaler", PowerTransformer())
])

In [16]:
def is_north(X):
    columns = X.columns.to_list()
    north_cities = ["Delhi", "Mumbai", "New Delhi"]
    
    return (
        X
        .assign(**{
            f"{col}_is_north" : X.loc[:, col].isin(north_cities).astype(int)
            for col in columns
        })
        .drop(columns=columns)
    )

In [17]:
loc_transformer = Pipeline(steps=[
    ("part1", location_pipe1),
    ("part2", FunctionTransformer(func=is_north))
])

In [18]:
time_pipe1 = Pipeline(steps=[
    ("dt", DatetimeFeatures(features_to_extract=["hour", "minute"])),
    ("scaler", MinMaxScaler())
])

In [19]:
def part_of_day(X, morning=4, afternoon=12, evening=16, night=20):
    columns = X.columns.to_list()
    
    X_temp = X.assign(**{
        col : pd.to_datetime(X.loc[:, col]).dt.hour
        for col in columns
    })
    
    return (
        X_temp
        .assign(**{
            f"{col}_part_of_day" : np.select(
                [X_temp.loc[:, col].between(morning, afternoon, inclusive="left"),
                X_temp.loc[:, col].between(afternoon, evening, inclusive="left"),
                X_temp.loc[:, col].between(evening, night, inclusive="left")],
                ["morning", "afternoon", "evening"],
                default="night"
            )
            for col in columns
        })
        .drop(columns=columns)
    )

In [20]:
time_pipe2 = Pipeline(steps=[
    ("funcT", FunctionTransformer(func=part_of_day)),
    ("encoder", CountFrequencyEncoder()),
    ("scaler", MinMaxScaler())
])

In [21]:
time_transformer = FeatureUnion(transformer_list=[
    ("part1", time_pipe1),
    ("part2", time_pipe2)
])

In [22]:
class RBFPercentileSimilarity(BaseEstimator, TransformerMixin):
    def __init__(self, variables=None, percentiles=[0.25, 0.5, 0.75], gamma=0.1):
        self.variables = variables
        self.percentiles = percentiles
        self.gamma = gamma
        
    def fit(self, X, y=None):
        X = pd.DataFrame(X)
        if not self.variables:
            self.variables = X.select_dtypes(include="number").columns.to_list()
            
        self.reference_values_ = {
            col : (
                X
                .loc[:, col]
                .quantile(self.percentiles)
                .values
                .reshape(-1, 1)
            )
            for col in self.variables
        }
    
        return self

    def transform(self, X):
        X = pd.DataFrame(X)
        objects = []
        for col in self.variables:
            columns = [f"{col}_rbf_{int(percentile*100)}" for percentile in self.percentiles]
            obj = pd.DataFrame(
                data = rbf_kernel(X.loc[:, [col]], Y = self.reference_values_[col] , gamma = self.gamma),
                columns=columns
            )
                
            objects.append(obj)
                
        return pd.concat(objects, axis=1)

In [23]:
def classify_duration(X, short=180, medium=400):
    return (
        X
        .assign(duration_modified = np.select(
                [X.duration.lt(short),
                X.duration.between(short, medium, inclusive="left")],
                ["short", "medium"],
                default="long"
            ))
        .drop(columns="duration")
    )

In [24]:
def is_over(X, value=1000):
    return (
        X
        .assign(**{
            f"duration_over{value}":X.duration.gt(value).astype(int)
        })
        .drop(columns="duration")
    )

In [25]:
duration_pipe1 = Pipeline(steps=[
    ("rbf", RBFPercentileSimilarity()),
    ("scaler", PowerTransformer())
])

duration_pipe2 = Pipeline(steps=[
    ("cat", FunctionTransformer(func=classify_duration)),
    ("encoder", OrdinalEncoder(categories=[["short", "medium", "long"]]))
])

duration_union = FeatureUnion(transformer_list=[
    ("part1", duration_pipe1),
    ("part2", duration_pipe2),
    ("part3", FunctionTransformer(func=is_over)),
    ("part4", StandardScaler())
]) 

duration_transformer = Pipeline(steps=[
    ("outliers", Winsorizer(capping_method='iqr', fold=1.5)),
    ("imputer", SimpleImputer(strategy="median")),
    ("union", duration_union)
])

In [26]:
def is_direct(X):
    return X.assign(is_direct_flight=X.total_stops.eq(0).astype(int))

In [27]:
total_stops_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("", FunctionTransformer(func=is_direct))
])

In [28]:
info_pipe1 = Pipeline(steps=[
    ("grouper", RareLabelEncoder(tol=0.1, n_categories=2, replace_with="Other")),
    ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])

In [29]:
def have_info(X):
    return X.assign(additional_info=X.additional_info.ne("No info").astype(int))

In [30]:
info_union = FeatureUnion(transformer_list=[
    ("part1", info_pipe1),
    ("part2", FunctionTransformer(func=have_info))
])

In [31]:
info_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="constant", fill_value="unknown")),
    ("union", info_union)
])

In [32]:
col_transformer = ColumnTransformer(transformers=[
    ("t1", air_transformer, ["airline"]),
    ("t2", doj_transformer, ["date_of_journey"]),
    ("t3", loc_transformer, ["source", "destination"]),
    ("t4", time_transformer, ["arrival_time", "dep_time"]),
    ("t5", duration_transformer, ["duration"]),
    ("t6", total_stops_transformer, ["total_stops"]),
    ("t7", info_transformer, ["additional_info"])
], remainder="passthrough")

In [33]:
estimator = RandomForestRegressor(n_estimators=10, max_depth=3, random_state=42)

selector = SelectBySingleFeaturePerformance(
    estimator=estimator,
    scoring="r2",
    threshold=0.1
)

In [34]:
preprocessor = Pipeline(steps=[
    ("ct", col_transformer),
    ("selector", selector)
])

In [35]:
preprocessor.fit(train.drop(columns="price"),
                train.price.copy())

In [36]:
train_transformed = preprocessor.transform(train.drop(columns="price"))

In [37]:
train_transformed

Unnamed: 0,t1__airline_Indigo,t1__airline_Jet Airways,t1__airline_Other,t2__date_of_journey_week,t5__duration_rbf_25,t5__duration_modified,t5__duration_over1000,t5__duration,t6__total_stops,t6__is_direct_flight
0,0.0,1.0,0.0,0.176471,-0.364262,2.0,0,-0.033916,1.0,0
1,0.0,1.0,0.0,0.235294,-0.364262,2.0,0,0.046422,1.0,0
2,0.0,0.0,1.0,0.058824,2.373008,0.0,0,-0.917631,0.0,1
3,0.0,0.0,0.0,0.882353,-0.364262,2.0,0,-0.174507,1.0,0
4,0.0,1.0,0.0,0.117647,-0.364262,2.0,0,-0.214676,1.0,0
...,...,...,...,...,...,...,...,...,...,...
6690,0.0,1.0,0.0,0.176471,-0.364262,2.0,1,2.597145,2.0,0
6691,0.0,0.0,0.0,0.529412,-0.364262,2.0,0,-0.174507,1.0,0
6692,0.0,1.0,0.0,0.764706,-0.364262,1.0,0,-0.666576,1.0,0
6693,0.0,0.0,1.0,1.000000,-0.364262,1.0,0,-0.606322,1.0,0


In [38]:
val_transformed = preprocessor.transform(val.drop(columns="price"))

In [39]:
val_transformed

Unnamed: 0,t1__airline_Indigo,t1__airline_Jet Airways,t1__airline_Other,t2__date_of_journey_week,t5__duration_rbf_25,t5__duration_modified,t5__duration_over1000,t5__duration,t6__total_stops,t6__is_direct_flight
0,1.0,0.0,0.0,1.000000,-0.364262,1.0,0,-0.656533,1.0,0
1,0.0,0.0,0.0,0.882353,-0.364262,2.0,0,0.267351,1.0,0
2,0.0,1.0,0.0,0.117647,-0.364262,2.0,0,0.699166,1.0,0
3,0.0,0.0,0.0,0.823529,-0.364262,2.0,0,-0.305056,1.0,0
4,0.0,1.0,0.0,0.647059,-0.364262,2.0,1,2.265752,2.0,0
...,...,...,...,...,...,...,...,...,...,...
1669,0.0,0.0,1.0,0.529412,-0.364262,0.0,0,-0.997969,0.0,1
1670,1.0,0.0,0.0,0.529412,-0.364262,1.0,0,-0.686660,1.0,0
1671,0.0,1.0,0.0,0.764706,-0.364262,2.0,0,-0.415520,2.0,0
1672,0.0,1.0,0.0,0.882353,-0.364262,0.0,0,-1.088349,0.0,1


In [40]:
test_transformed = preprocessor.transform(test.drop(columns="price"))

In [41]:
test_transformed

Unnamed: 0,t1__airline_Indigo,t1__airline_Jet Airways,t1__airline_Other,t2__date_of_journey_week,t5__duration_rbf_25,t5__duration_modified,t5__duration_over1000,t5__duration,t6__total_stops,t6__is_direct_flight
0,0.0,1.0,0.0,0.058824,-0.364262,2.0,1,1.653176,1.0,0
1,0.0,0.0,1.0,0.823529,-0.364262,0.0,0,-0.987926,0.0,1
2,1.0,0.0,0.0,0.176471,3.112489,0.0,0,-0.927673,0.0,1
3,1.0,0.0,0.0,1.000000,-0.364262,0.0,0,-0.987926,0.0,1
4,1.0,0.0,0.0,0.588235,-0.364262,0.0,0,-0.967842,0.0,1
...,...,...,...,...,...,...,...,...,...,...
2088,0.0,1.0,0.0,0.764706,-0.364262,2.0,1,0.819672,1.0,0
2089,0.0,0.0,0.0,1.000000,-0.364262,2.0,0,-0.325140,1.0,0
2090,0.0,1.0,0.0,0.823529,-0.364262,2.0,1,1.884147,1.0,0
2091,0.0,0.0,0.0,0.823529,-0.364262,2.0,0,-0.033916,1.0,0


# 4. Training the model

In [42]:
def objective(trial):
    dtrain = xgb.DMatrix(train_transformed, label=train["price"])
    dval = xgb.DMatrix(val_transformed, label=val["price"])
    
    param = {
        'objective': 'reg:squarederror',
        'eta': trial.suggest_float('eta', 0.05, 0.2),
        'max_depth': trial.suggest_int('max_depth', 3, 5),
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'alpha': trial.suggest_float('alpha', 0, 1)
    }
    
    flight = xgb.train(param, dtrain, 
                   num_boost_round=100, evals=[(dval, 'validation')], 
                   early_stopping_rounds=10, 
                   maximize=True)
    
    # Predict on the validation set
    preds = flight.predict(dval)
    r2 = r2_score(val["price"], preds)
    
    return r2

In [43]:
# Create a study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2024-06-17 19:54:57,809] A new study created in memory with name: no-name-290be5b0-d0b2-46e3-bf58-3fcc6f639276


[0]	validation-rmse:4443.45183
[1]	validation-rmse:4208.99773
[2]	validation-rmse:3928.20282
[3]	validation-rmse:3714.90409
[4]	validation-rmse:3519.39349
[5]	validation-rmse:3340.24398
[6]	validation-rmse:3200.67875
[7]	validation-rmse:3103.16018
[8]	validation-rmse:3026.95378
[9]	validation-rmse:2955.81746


[I 2024-06-17 19:54:57,947] Trial 0 finished with value: 0.6210654377937317 and parameters: {'eta': 0.11869271789346093, 'max_depth': 5, 'alpha': 0.15100855323559936}. Best is trial 0 with value: 0.6210654377937317.


[0]	validation-rmse:4462.58231
[1]	validation-rmse:4247.50798
[2]	validation-rmse:4021.77440
[3]	validation-rmse:3836.85117
[4]	validation-rmse:3683.84019
[5]	validation-rmse:3559.86158
[6]	validation-rmse:3453.79250
[7]	validation-rmse:3361.75251
[8]	validation-rmse:3286.91893
[9]	validation-rmse:3223.06447


[I 2024-06-17 19:54:58,067] Trial 1 finished with value: 0.5495844483375549 and parameters: {'eta': 0.1176441131331355, 'max_depth': 3, 'alpha': 0.9203525308304396}. Best is trial 0 with value: 0.6210654377937317.


[0]	validation-rmse:4329.54172
[1]	validation-rmse:4030.09142
[2]	validation-rmse:3700.87258
[3]	validation-rmse:3466.58973
[4]	validation-rmse:3276.50091
[5]	validation-rmse:3133.66449
[6]	validation-rmse:3030.37106
[7]	validation-rmse:2953.39317
[8]	validation-rmse:2900.70432
[9]	validation-rmse:2835.50531
[10]	validation-rmse:2804.68380


[I 2024-06-17 19:54:58,153] Trial 2 finished with value: 0.6466708183288574 and parameters: {'eta': 0.17392413046026245, 'max_depth': 4, 'alpha': 0.49074959004846985}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4296.35377
[1]	validation-rmse:3990.68430
[2]	validation-rmse:3686.23325
[3]	validation-rmse:3461.65054
[4]	validation-rmse:3305.62982
[5]	validation-rmse:3181.44517
[6]	validation-rmse:3082.75061
[7]	validation-rmse:3011.43315
[8]	validation-rmse:2965.69669
[9]	validation-rmse:2916.87584


[I 2024-06-17 19:54:58,269] Trial 3 finished with value: 0.6262945532798767 and parameters: {'eta': 0.19850926167024407, 'max_depth': 3, 'alpha': 0.11593180166431749}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4514.45622
[1]	validation-rmse:4331.46179
[2]	validation-rmse:4107.98748
[3]	validation-rmse:3932.00828
[4]	validation-rmse:3760.16455
[5]	validation-rmse:3596.53430
[6]	validation-rmse:3460.85774
[7]	validation-rmse:3357.19231
[8]	validation-rmse:3278.91344
[9]	validation-rmse:3191.61044
[10]	validation-rmse:3125.57619


[I 2024-06-17 19:54:58,394] Trial 4 finished with value: 0.5611947178840637 and parameters: {'eta': 0.08748872281710492, 'max_depth': 5, 'alpha': 0.22838111685849327}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4545.97424
[1]	validation-rmse:4389.02028
[2]	validation-rmse:4223.15374
[3]	validation-rmse:4077.69007
[4]	validation-rmse:3947.30562
[5]	validation-rmse:3833.10078
[6]	validation-rmse:3729.82466
[7]	validation-rmse:3633.49243
[8]	validation-rmse:3551.95030
[9]	validation-rmse:3478.98717
[10]	validation-rmse:3415.73227


[I 2024-06-17 19:54:58,493] Trial 5 finished with value: 0.4759420156478882 and parameters: {'eta': 0.0786849747595064, 'max_depth': 3, 'alpha': 0.2644904340045251}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4578.79064
[1]	validation-rmse:4447.13555
[2]	validation-rmse:4284.58411
[3]	validation-rmse:4151.25078
[4]	validation-rmse:4015.40941
[5]	validation-rmse:3881.59019
[6]	validation-rmse:3764.17253
[7]	validation-rmse:3669.96684
[8]	validation-rmse:3589.48479
[9]	validation-rmse:3502.23046
[10]	validation-rmse:3432.21635


[I 2024-06-17 19:54:58,651] Trial 6 finished with value: 0.4708717465400696 and parameters: {'eta': 0.05970421688459476, 'max_depth': 5, 'alpha': 0.8090273429988121}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4333.18138
[1]	validation-rmse:4035.57031
[2]	validation-rmse:3708.13895
[3]	validation-rmse:3474.40167
[4]	validation-rmse:3284.10611
[5]	validation-rmse:3140.64031
[6]	validation-rmse:3035.92772
[7]	validation-rmse:2958.87607
[8]	validation-rmse:2905.67018
[9]	validation-rmse:2839.76573


[I 2024-06-17 19:54:58,747] Trial 7 finished with value: 0.6456525325775146 and parameters: {'eta': 0.172212821112505, 'max_depth': 4, 'alpha': 0.16754691503605823}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4473.52803
[1]	validation-rmse:4260.04294
[2]	validation-rmse:4004.39164
[3]	validation-rmse:3806.50974
[4]	validation-rmse:3618.70645
[5]	validation-rmse:3443.64249
[6]	validation-rmse:3303.69439
[7]	validation-rmse:3201.80754
[8]	validation-rmse:3127.31535
[9]	validation-rmse:3052.30315
[10]	validation-rmse:2994.17960


[I 2024-06-17 19:54:58,850] Trial 8 finished with value: 0.5973132252693176 and parameters: {'eta': 0.10540266543341, 'max_depth': 5, 'alpha': 0.43924361806273005}. Best is trial 2 with value: 0.6466708183288574.


[0]	validation-rmse:4307.21600
[1]	validation-rmse:3996.89576
[2]	validation-rmse:3657.17176
[3]	validation-rmse:3420.05660
[4]	validation-rmse:3231.73740
[5]	validation-rmse:3093.10613
[6]	validation-rmse:2994.83497
[7]	validation-rmse:2921.37486
[8]	validation-rmse:2868.07994
[9]	validation-rmse:2822.34281
[10]	validation-rmse:2797.02094


[I 2024-06-17 19:54:58,934] Trial 9 finished with value: 0.6485989093780518 and parameters: {'eta': 0.1844672020799113, 'max_depth': 4, 'alpha': 0.06862641865607055}. Best is trial 9 with value: 0.6485989093780518.


[0]	validation-rmse:4376.07092
[1]	validation-rmse:4101.53839
[2]	validation-rmse:3796.79365
[3]	validation-rmse:3571.35994
[4]	validation-rmse:3380.53651
[5]	validation-rmse:3231.86027
[6]	validation-rmse:3117.95714
[7]	validation-rmse:3032.36246
[8]	validation-rmse:2969.58997
[9]	validation-rmse:2905.83587
[10]	validation-rmse:2866.96153


[I 2024-06-17 19:54:59,091] Trial 10 finished with value: 0.6308053731918335 and parameters: {'eta': 0.15220116021122357, 'max_depth': 4, 'alpha': 0.6816701418959741}. Best is trial 9 with value: 0.6485989093780518.


[0]	validation-rmse:4362.09180
[1]	validation-rmse:4079.75363
[2]	validation-rmse:3767.28114
[3]	validation-rmse:3538.75253
[4]	validation-rmse:3347.69714
[5]	validation-rmse:3200.75531
[6]	validation-rmse:3089.39619
[7]	validation-rmse:3007.42725
[8]	validation-rmse:2946.69495
[9]	validation-rmse:2887.45114


[I 2024-06-17 19:54:59,212] Trial 11 finished with value: 0.6350773572921753 and parameters: {'eta': 0.15869309252978278, 'max_depth': 4, 'alpha': 0.49124453845293564}. Best is trial 9 with value: 0.6485989093780518.


[0]	validation-rmse:4290.91808
[1]	validation-rmse:3973.11206
[2]	validation-rmse:3626.20722
[3]	validation-rmse:3387.56466
[4]	validation-rmse:3201.04745
[5]	validation-rmse:3065.81687
[6]	validation-rmse:2975.00373
[7]	validation-rmse:2899.38221
[8]	validation-rmse:2853.13154
[9]	validation-rmse:2813.04315
[10]	validation-rmse:2790.54990


[I 2024-06-17 19:54:59,349] Trial 12 finished with value: 0.6502230167388916 and parameters: {'eta': 0.1922155072867085, 'max_depth': 4, 'alpha': 0.3722154033828167}. Best is trial 12 with value: 0.6502230167388916.


[0]	validation-rmse:4276.27646
[1]	validation-rmse:3952.07904
[2]	validation-rmse:3599.06690
[3]	validation-rmse:3358.76747
[4]	validation-rmse:3173.62065
[5]	validation-rmse:3039.63536
[6]	validation-rmse:2948.82858
[7]	validation-rmse:2881.76271
[8]	validation-rmse:2835.87766
[9]	validation-rmse:2780.62192


[I 2024-06-17 19:54:59,464] Trial 13 finished with value: 0.6582807302474976 and parameters: {'eta': 0.19921437701258643, 'max_depth': 4, 'alpha': 0.0002546936648282483}. Best is trial 13 with value: 0.6582807302474976.


[0]	validation-rmse:4407.04392
[1]	validation-rmse:4151.95002
[2]	validation-rmse:3865.71012
[3]	validation-rmse:3648.23689
[4]	validation-rmse:3459.38415
[5]	validation-rmse:3307.98142
[6]	validation-rmse:3189.40453
[7]	validation-rmse:3101.84768
[8]	validation-rmse:3032.42537
[9]	validation-rmse:2965.30497
[10]	validation-rmse:2920.75993


[I 2024-06-17 19:54:59,606] Trial 14 finished with value: 0.6168195009231567 and parameters: {'eta': 0.1379181548841278, 'max_depth': 4, 'alpha': 0.33444334925496105}. Best is trial 13 with value: 0.6582807302474976.


[0]	validation-rmse:4305.43131
[1]	validation-rmse:4003.68135
[2]	validation-rmse:3702.36666
[3]	validation-rmse:3478.69451
[4]	validation-rmse:3321.50382
[5]	validation-rmse:3196.04652
[6]	validation-rmse:3097.32542
[7]	validation-rmse:3025.70938
[8]	validation-rmse:2975.63244
[9]	validation-rmse:2927.36946


[I 2024-06-17 19:54:59,724] Trial 15 finished with value: 0.62416011095047 and parameters: {'eta': 0.19396727334469374, 'max_depth': 3, 'alpha': 0.0177016819896349}. Best is trial 13 with value: 0.6582807302474976.


[0]	validation-rmse:4389.30214
[1]	validation-rmse:4122.40848
[2]	validation-rmse:3825.28143
[3]	validation-rmse:3603.13726
[4]	validation-rmse:3412.92007
[5]	validation-rmse:3262.91841
[6]	validation-rmse:3146.49925
[7]	validation-rmse:3059.44955
[8]	validation-rmse:2993.64998
[9]	validation-rmse:2928.77766
[10]	validation-rmse:2890.91282


[I 2024-06-17 19:54:59,879] Trial 16 finished with value: 0.6246109008789062 and parameters: {'eta': 0.1460828708841479, 'max_depth': 4, 'alpha': 0.6284758595242748}. Best is trial 13 with value: 0.6582807302474976.


[0]	validation-rmse:4350.56688
[1]	validation-rmse:4070.09312
[2]	validation-rmse:3786.26010
[3]	validation-rmse:3570.12515
[4]	validation-rmse:3412.56232
[5]	validation-rmse:3282.12707
[6]	validation-rmse:3186.87396
[7]	validation-rmse:3104.53130
[8]	validation-rmse:3049.98969
[9]	validation-rmse:2987.21773
[10]	validation-rmse:2957.30401


[I 2024-06-17 19:55:00,057] Trial 17 finished with value: 0.6071709394454956 and parameters: {'eta': 0.17161379822920417, 'max_depth': 3, 'alpha': 0.3812714828616274}. Best is trial 13 with value: 0.6582807302474976.


[0]	validation-rmse:4267.68975
[1]	validation-rmse:3937.68303
[2]	validation-rmse:3552.50168
[3]	validation-rmse:3295.88121
[4]	validation-rmse:3094.50440
[5]	validation-rmse:2929.32077
[6]	validation-rmse:2825.57834
[7]	validation-rmse:2767.22328
[8]	validation-rmse:2725.40809
[9]	validation-rmse:2693.20889


[I 2024-06-17 19:55:00,213] Trial 18 finished with value: 0.6795485019683838 and parameters: {'eta': 0.19878496803621623, 'max_depth': 5, 'alpha': 0.596940166077746}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4340.99136
[1]	validation-rmse:4045.56142
[2]	validation-rmse:3698.46798
[3]	validation-rmse:3461.40608
[4]	validation-rmse:3255.22491
[5]	validation-rmse:3077.66600
[6]	validation-rmse:2956.38672
[7]	validation-rmse:2879.72696
[8]	validation-rmse:2824.96608
[9]	validation-rmse:2774.71924
[10]	validation-rmse:2742.67445


[I 2024-06-17 19:55:00,383] Trial 19 finished with value: 0.6621217727661133 and parameters: {'eta': 0.16484256984934126, 'max_depth': 5, 'alpha': 0.6385746399711245}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4354.67416
[1]	validation-rmse:4066.54385
[2]	validation-rmse:3727.51409
[3]	validation-rmse:3493.26811
[4]	validation-rmse:3287.14433
[5]	validation-rmse:3108.13673
[6]	validation-rmse:2983.81998
[7]	validation-rmse:2903.23126
[8]	validation-rmse:2845.81567
[9]	validation-rmse:2794.57206


[I 2024-06-17 19:55:00,544] Trial 20 finished with value: 0.6578637361526489 and parameters: {'eta': 0.15859641943219105, 'max_depth': 5, 'alpha': 0.6217135457354654}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4310.16667
[1]	validation-rmse:3999.26204
[2]	validation-rmse:3635.08041
[3]	validation-rmse:3387.95811
[4]	validation-rmse:3183.18676
[5]	validation-rmse:3009.74926
[6]	validation-rmse:2896.33814
[7]	validation-rmse:2827.07960
[8]	validation-rmse:2778.21514
[9]	validation-rmse:2740.07830


[I 2024-06-17 19:55:00,763] Trial 21 finished with value: 0.6670445203781128 and parameters: {'eta': 0.17901484154494587, 'max_depth': 5, 'alpha': 0.7075126956238023}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4308.76589
[1]	validation-rmse:3997.19014
[2]	validation-rmse:3632.26661
[3]	validation-rmse:3384.93443
[4]	validation-rmse:3180.26197
[5]	validation-rmse:3007.05202
[6]	validation-rmse:2893.99769
[7]	validation-rmse:2825.08050
[8]	validation-rmse:2776.48611
[9]	validation-rmse:2738.61491
[10]	validation-rmse:2721.40703


[I 2024-06-17 19:55:00,990] Trial 22 finished with value: 0.6673414707183838 and parameters: {'eta': 0.17966231414818287, 'max_depth': 5, 'alpha': 0.7396716737021383}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4298.69173
[1]	validation-rmse:3982.34631
[2]	validation-rmse:3612.33840
[3]	validation-rmse:3363.24830
[4]	validation-rmse:3157.88845
[5]	validation-rmse:2987.31484
[6]	validation-rmse:2873.94181
[7]	validation-rmse:2809.50036
[8]	validation-rmse:2758.29582
[9]	validation-rmse:2724.33003
[10]	validation-rmse:2700.83601


[I 2024-06-17 19:55:01,152] Trial 23 finished with value: 0.672351598739624 and parameters: {'eta': 0.18432756358264216, 'max_depth': 5, 'alpha': 0.7559327485841055}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4408.34012
[1]	validation-rmse:4151.35087
[2]	validation-rmse:3845.15848
[3]	validation-rmse:3618.82797
[4]	validation-rmse:3416.71409
[5]	validation-rmse:3236.22081
[6]	validation-rmse:3100.41702
[7]	validation-rmse:3008.62717
[8]	validation-rmse:2938.80457
[9]	validation-rmse:2874.91590
[10]	validation-rmse:2831.50084


[I 2024-06-17 19:55:01,303] Trial 24 finished with value: 0.6398818492889404 and parameters: {'eta': 0.13435029079385316, 'max_depth': 5, 'alpha': 0.8093147609116156}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4299.07106
[1]	validation-rmse:3982.90150
[2]	validation-rmse:3613.08808
[3]	validation-rmse:3364.04657
[4]	validation-rmse:3158.65592
[5]	validation-rmse:2988.01671
[6]	validation-rmse:2874.54494
[7]	validation-rmse:2810.00942
[8]	validation-rmse:2758.73629
[9]	validation-rmse:2724.70525
[10]	validation-rmse:2701.15490


[I 2024-06-17 19:55:01,439] Trial 25 finished with value: 0.6722741723060608 and parameters: {'eta': 0.18415166300773606, 'max_depth': 5, 'alpha': 0.9580391884310017}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4295.80061
[1]	validation-rmse:3978.12269
[2]	validation-rmse:3606.63973
[3]	validation-rmse:3357.18603
[4]	validation-rmse:3152.07048
[5]	validation-rmse:2982.00263
[6]	validation-rmse:2869.38463
[7]	validation-rmse:2805.65963
[8]	validation-rmse:2756.12627
[9]	validation-rmse:2722.68288
[10]	validation-rmse:2699.69861


[I 2024-06-17 19:55:01,602] Trial 26 finished with value: 0.6726274490356445 and parameters: {'eta': 0.18566937863412433, 'max_depth': 5, 'alpha': 0.9736692449258676}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4291.52857
[1]	validation-rmse:3971.90353
[2]	validation-rmse:3598.26364
[3]	validation-rmse:3348.37563
[4]	validation-rmse:3143.66221
[5]	validation-rmse:2974.33438
[6]	validation-rmse:2864.12863
[7]	validation-rmse:2801.12730
[8]	validation-rmse:2752.50210
[9]	validation-rmse:2719.79715
[10]	validation-rmse:2697.10651


[I 2024-06-17 19:55:01,731] Trial 27 finished with value: 0.6732558012008667 and parameters: {'eta': 0.18765444172747353, 'max_depth': 5, 'alpha': 0.8862772940385016}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4390.23454
[1]	validation-rmse:4122.28624
[2]	validation-rmse:3804.56165
[3]	validation-rmse:3579.16123
[4]	validation-rmse:3374.84230
[5]	validation-rmse:3194.60170
[6]	validation-rmse:3063.42076
[7]	validation-rmse:2974.40696
[8]	validation-rmse:2907.89827
[9]	validation-rmse:2848.22047
[10]	validation-rmse:2806.70555


[I 2024-06-17 19:55:01,882] Trial 28 finished with value: 0.6461611986160278 and parameters: {'eta': 0.14248669349208776, 'max_depth': 5, 'alpha': 0.9974278304426003}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4429.67379
[1]	validation-rmse:4186.17618
[2]	validation-rmse:3895.15999
[3]	validation-rmse:3676.24664
[4]	validation-rmse:3477.74321
[5]	validation-rmse:3297.33309
[6]	validation-rmse:3158.90081
[7]	validation-rmse:3063.64731
[8]	validation-rmse:2989.77432
[9]	validation-rmse:2921.46859
[10]	validation-rmse:2873.28878


[I 2024-06-17 19:55:02,047] Trial 29 finished with value: 0.629173994064331 and parameters: {'eta': 0.1248182050020317, 'max_depth': 5, 'alpha': 0.8821185404102052}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4348.65424
[1]	validation-rmse:4057.27995
[2]	validation-rmse:3714.66575
[3]	validation-rmse:3479.13802
[4]	validation-rmse:3272.94169
[5]	validation-rmse:3094.53166
[6]	validation-rmse:2971.52656
[7]	validation-rmse:2892.37613
[8]	validation-rmse:2836.21343
[9]	validation-rmse:2786.85858
[10]	validation-rmse:2752.99645


[I 2024-06-17 19:55:02,185] Trial 30 finished with value: 0.6595737934112549 and parameters: {'eta': 0.1613412306726963, 'max_depth': 5, 'alpha': 0.8790979529304119}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4288.38246
[1]	validation-rmse:3967.34036
[2]	validation-rmse:3592.12968
[3]	validation-rmse:3342.42234
[4]	validation-rmse:3138.01327
[5]	validation-rmse:2969.14771
[6]	validation-rmse:2859.75269
[7]	validation-rmse:2797.59767
[8]	validation-rmse:2749.54022
[9]	validation-rmse:2717.08917
[10]	validation-rmse:2694.85904


[I 2024-06-17 19:55:02,311] Trial 31 finished with value: 0.6738001108169556 and parameters: {'eta': 0.1891180755960951, 'max_depth': 5, 'alpha': 0.8015388751670346}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4284.66176
[1]	validation-rmse:3961.96213
[2]	validation-rmse:3584.91239
[3]	validation-rmse:3334.80758
[4]	validation-rmse:3130.77800
[5]	validation-rmse:2962.60795
[6]	validation-rmse:2854.19056
[7]	validation-rmse:2792.95270
[8]	validation-rmse:2745.55417
[9]	validation-rmse:2713.44191
[10]	validation-rmse:2689.48275


[I 2024-06-17 19:55:02,449] Trial 32 finished with value: 0.6751003861427307 and parameters: {'eta': 0.1908512124202653, 'max_depth': 5, 'alpha': 0.8404044047405526}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4282.41089
[1]	validation-rmse:3958.71813
[2]	validation-rmse:3580.56568
[3]	validation-rmse:3330.23139
[4]	validation-rmse:3126.44203
[5]	validation-rmse:2958.69981
[6]	validation-rmse:2850.87643
[7]	validation-rmse:2790.19171
[8]	validation-rmse:2742.21171
[9]	validation-rmse:2705.57195
[10]	validation-rmse:2682.27297


[I 2024-06-17 19:55:02,582] Trial 33 finished with value: 0.6768399477005005 and parameters: {'eta': 0.19190062654050438, 'max_depth': 5, 'alpha': 0.568233054681522}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4275.42459
[1]	validation-rmse:3948.69620
[2]	validation-rmse:3567.16878
[3]	validation-rmse:3316.17353
[4]	validation-rmse:3113.83545
[5]	validation-rmse:2947.60448
[6]	validation-rmse:2841.68465
[7]	validation-rmse:2781.59502
[8]	validation-rmse:2737.63941
[9]	validation-rmse:2703.73699
[10]	validation-rmse:2680.61727


[I 2024-06-17 19:55:02,710] Trial 34 finished with value: 0.6772388219833374 and parameters: {'eta': 0.1951633649510025, 'max_depth': 5, 'alpha': 0.5566860075164938}. Best is trial 18 with value: 0.6795485019683838.


[0]	validation-rmse:4266.52127
[1]	validation-rmse:3936.02694
[2]	validation-rmse:3550.30114
[3]	validation-rmse:3293.57800
[4]	validation-rmse:3092.35562
[5]	validation-rmse:2927.41326
[6]	validation-rmse:2823.98641
[7]	validation-rmse:2765.89193
[8]	validation-rmse:2724.28986
[9]	validation-rmse:2692.28381


[I 2024-06-17 19:55:02,850] Trial 35 finished with value: 0.6797381639480591 and parameters: {'eta': 0.19933286691792954, 'max_depth': 5, 'alpha': 0.5481416786496501}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4267.30031
[1]	validation-rmse:3937.13082
[2]	validation-rmse:3551.76786
[3]	validation-rmse:3295.11287
[4]	validation-rmse:3093.78728
[5]	validation-rmse:2928.68395
[6]	validation-rmse:2825.04665
[7]	validation-rmse:2766.77848
[8]	validation-rmse:2725.03436
[9]	validation-rmse:2692.89966


[I 2024-06-17 19:55:02,991] Trial 36 finished with value: 0.6796119213104248 and parameters: {'eta': 0.1989675444967374, 'max_depth': 5, 'alpha': 0.544233919095614}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4265.24930
[1]	validation-rmse:3934.22619
[2]	validation-rmse:3547.90986
[3]	validation-rmse:3292.88548
[4]	validation-rmse:3092.21826
[5]	validation-rmse:2928.06768
[6]	validation-rmse:2823.12826
[7]	validation-rmse:2768.13891
[8]	validation-rmse:2726.72095
[9]	validation-rmse:2696.89111
[10]	validation-rmse:2675.12392


[I 2024-06-17 19:55:03,125] Trial 37 finished with value: 0.6785603761672974 and parameters: {'eta': 0.199929653267105, 'max_depth': 5, 'alpha': 0.568379226006396}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4265.10546
[1]	validation-rmse:3934.02272
[2]	validation-rmse:3547.63979
[3]	validation-rmse:3292.60389
[4]	validation-rmse:3091.95620
[5]	validation-rmse:2927.83583
[6]	validation-rmse:2822.93564
[7]	validation-rmse:2767.97823
[8]	validation-rmse:2726.58573
[9]	validation-rmse:2696.78039
[10]	validation-rmse:2675.03041


[I 2024-06-17 19:55:03,250] Trial 38 finished with value: 0.6785827875137329 and parameters: {'eta': 0.19999715378936084, 'max_depth': 5, 'alpha': 0.5435601876612952}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4321.84963
[1]	validation-rmse:4016.65135
[2]	validation-rmse:3658.77293
[3]	validation-rmse:3418.34430
[4]	validation-rmse:3212.70843
[5]	validation-rmse:3037.80855
[6]	validation-rmse:2921.05207
[7]	validation-rmse:2849.28561
[8]	validation-rmse:2797.32106
[9]	validation-rmse:2751.90057
[10]	validation-rmse:2723.25908


[I 2024-06-17 19:55:03,411] Trial 39 finished with value: 0.6668885350227356 and parameters: {'eta': 0.17362655818027584, 'max_depth': 5, 'alpha': 0.4381216144034148}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4601.44420
[1]	validation-rmse:4489.16252
[2]	validation-rmse:4350.71551
[3]	validation-rmse:4234.87823
[4]	validation-rmse:4115.38822
[5]	validation-rmse:3998.87669
[6]	validation-rmse:3892.43744
[7]	validation-rmse:3805.24702
[8]	validation-rmse:3730.31743
[9]	validation-rmse:3647.05770
[10]	validation-rmse:3578.89568


[I 2024-06-17 19:55:03,550] Trial 40 finished with value: 0.42467963695526123 and parameters: {'eta': 0.0500231197652172, 'max_depth': 5, 'alpha': 0.5268848140161837}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4268.38427
[1]	validation-rmse:3938.66835
[2]	validation-rmse:3553.81165
[3]	validation-rmse:3297.25308
[4]	validation-rmse:3095.78536
[5]	validation-rmse:2930.45896
[6]	validation-rmse:2826.52907
[7]	validation-rmse:2768.01898
[8]	validation-rmse:2726.07679
[9]	validation-rmse:2693.76236


[I 2024-06-17 19:55:03,722] Trial 41 finished with value: 0.6794348955154419 and parameters: {'eta': 0.1984593352632731, 'max_depth': 5, 'alpha': 0.5769536673126218}. Best is trial 35 with value: 0.6797381639480591.


[0]	validation-rmse:4265.36568
[1]	validation-rmse:3934.39087
[2]	validation-rmse:3548.12839
[3]	validation-rmse:3291.30570
[4]	validation-rmse:3090.23806
[5]	validation-rmse:2925.53555
[6]	validation-rmse:2822.42122
[7]	validation-rmse:2764.25403
[8]	validation-rmse:2722.88260
[9]	validation-rmse:2691.08459


[I 2024-06-17 19:55:03,870] Trial 42 finished with value: 0.679989218711853 and parameters: {'eta': 0.1998750163296871, 'max_depth': 5, 'alpha': 0.4386990351118196}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4459.80667
[1]	validation-rmse:4236.42191
[2]	validation-rmse:3969.92136
[3]	validation-rmse:3763.85643
[4]	validation-rmse:3571.96053
[5]	validation-rmse:3394.61715
[6]	validation-rmse:3254.69903
[7]	validation-rmse:3154.42337
[8]	validation-rmse:3076.56449
[9]	validation-rmse:2995.85082
[10]	validation-rmse:2940.51709


[I 2024-06-17 19:55:04,072] Trial 43 finished with value: 0.6116180419921875 and parameters: {'eta': 0.11145233206309102, 'max_depth': 5, 'alpha': 0.45668484034270856}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4333.17212
[1]	validation-rmse:4033.68918
[2]	validation-rmse:3682.12094
[3]	validation-rmse:3443.60413
[4]	validation-rmse:3237.55988
[5]	validation-rmse:3060.96935
[6]	validation-rmse:2941.51084
[7]	validation-rmse:2867.04971
[8]	validation-rmse:2813.82406
[9]	validation-rmse:2766.02906


[I 2024-06-17 19:55:04,286] Trial 44 finished with value: 0.6639304161071777 and parameters: {'eta': 0.16842411654446143, 'max_depth': 5, 'alpha': 0.2620534811999461}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4313.14725
[1]	validation-rmse:4003.67987
[2]	validation-rmse:3641.08630
[3]	validation-rmse:3394.42175
[4]	validation-rmse:3189.48660
[5]	validation-rmse:3015.57630
[6]	validation-rmse:2901.41635
[7]	validation-rmse:2831.42849
[8]	validation-rmse:2781.98772
[9]	validation-rmse:2743.25012
[10]	validation-rmse:2725.27832


[I 2024-06-17 19:55:04,450] Trial 45 finished with value: 0.6663943529129028 and parameters: {'eta': 0.17763818612100055, 'max_depth': 5, 'alpha': 0.4926892822397266}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4547.32703
[1]	validation-rmse:4390.50864
[2]	validation-rmse:4209.16091
[3]	validation-rmse:4055.94151
[4]	validation-rmse:3910.94875
[5]	validation-rmse:3778.84594
[6]	validation-rmse:3660.85697
[7]	validation-rmse:3563.31250
[8]	validation-rmse:3481.25252
[9]	validation-rmse:3395.80019


[I 2024-06-17 19:55:04,650] Trial 46 finished with value: 0.5024365186691284 and parameters: {'eta': 0.07481240774107915, 'max_depth': 4, 'alpha': 0.6033234361765605}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4487.97547
[1]	validation-rmse:4284.81700
[2]	validation-rmse:4040.96699
[3]	validation-rmse:3850.43305
[4]	validation-rmse:3667.63915
[5]	validation-rmse:3495.86257
[6]	validation-rmse:3356.62707
[7]	validation-rmse:3253.62123
[8]	validation-rmse:3177.32946
[9]	validation-rmse:3099.97005
[10]	validation-rmse:3038.93654


[I 2024-06-17 19:55:04,830] Trial 47 finished with value: 0.5851845741271973 and parameters: {'eta': 0.09905708740515076, 'max_depth': 5, 'alpha': 0.39908290711830285}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4282.74080
[1]	validation-rmse:3961.32231
[2]	validation-rmse:3610.96634
[3]	validation-rmse:3371.20660
[4]	validation-rmse:3185.15172
[5]	validation-rmse:3049.72481
[6]	validation-rmse:2957.25126
[7]	validation-rmse:2888.13914
[8]	validation-rmse:2841.01850
[9]	validation-rmse:2784.00574
[10]	validation-rmse:2760.64774


[I 2024-06-17 19:55:04,997] Trial 48 finished with value: 0.6576789617538452 and parameters: {'eta': 0.19611990054305042, 'max_depth': 4, 'alpha': 0.3258454979250355}. Best is trial 42 with value: 0.679989218711853.


[0]	validation-rmse:4311.32696
[1]	validation-rmse:4000.98025
[2]	validation-rmse:3637.41518
[3]	validation-rmse:3390.46924
[4]	validation-rmse:3185.65378
[5]	validation-rmse:3012.03251
[6]	validation-rmse:2898.33303
[7]	validation-rmse:2828.78902
[8]	validation-rmse:2779.70112
[9]	validation-rmse:2741.31259


[I 2024-06-17 19:55:05,181] Trial 49 finished with value: 0.6667884588241577 and parameters: {'eta': 0.17847880001982502, 'max_depth': 5, 'alpha': 0.6707173395358084}. Best is trial 42 with value: 0.679989218711853.


In [44]:
# Get the best hyperparameters
print('Best hyperparameters: ', study.best_params)

Best hyperparameters:  {'eta': 0.1998750163296871, 'max_depth': 5, 'alpha': 0.4386990351118196}


In [47]:
# Train the model with the best hyperparameters on the full training set
best_params = study.best_params
best_params['objective'] = 'reg:squarederror'
dtrain = xgb.DMatrix(train_transformed, label=train["price"])
dtest = xgb.DMatrix(test_transformed, label=test["price"])
flight = xgb.train(best_params, dtrain, num_boost_round=100)

In [52]:
with open('xgboost-best-model.pkl', 'wb') as file:
    pickle.dump(flight, file)

In [53]:
y_pred = flight.predict(dtest)

In [54]:
r2 = r2_score(test["price"], y_pred)
print(f'Test R2: {r2:.2f}')

Test R2: 0.73
