In [1]:
import os
import sys
import numpy as np
import pandas as pd

from datetime import datetime

import lightgbm as lgb

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold

import optuna

import warnings
warnings.filterwarnings('ignore')

In [2]:
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

In [3]:
from src.paths import TRANSFORMED_DATA_DIR, RESIDUALS_DATA_DIR
from src.evaluation import evaluate_metrics, save_metrics

In [4]:
df = pd.read_parquet(TRANSFORMED_DATA_DIR / 'tabular_data.parquet')
df

Unnamed: 0,rides_previous_672_hour,rides_previous_671_hour,rides_previous_670_hour,rides_previous_669_hour,rides_previous_668_hour,rides_previous_667_hour,rides_previous_666_hour,rides_previous_665_hour,rides_previous_664_hour,rides_previous_663_hour,...,rides_previous_7_hour,rides_previous_6_hour,rides_previous_5_hour,rides_previous_4_hour,rides_previous_3_hour,rides_previous_2_hour,rides_previous_1_hour,pickup_hour,pickup_location_id,target_rides_next_hour
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-01-29,1,0.0
1,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,0.0,2.0,...,2.0,1.0,1.0,0.0,0.0,0.0,0.0,2023-01-30,1,0.0
2,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,...,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2023-01-31,1,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-02-01,1,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2023-02-02,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89300,1.0,0.0,2.0,0.0,0.0,3.0,3.0,0.0,3.0,2.0,...,2.0,2.0,1.0,2.0,3.0,1.0,3.0,2023-12-27,265,3.0
89301,5.0,7.0,2.0,1.0,0.0,1.0,2.0,2.0,3.0,3.0,...,6.0,4.0,2.0,4.0,10.0,3.0,3.0,2023-12-28,265,1.0
89302,5.0,3.0,2.0,3.0,1.0,3.0,1.0,5.0,3.0,1.0,...,3.0,1.0,8.0,5.0,1.0,0.0,6.0,2023-12-29,265,5.0
89303,3.0,4.0,9.0,4.0,1.0,2.0,0.0,0.0,0.0,2.0,...,6.0,3.0,2.0,2.0,5.0,1.0,5.0,2023-12-30,265,2.0


In [5]:
from src.data_split import train_test_split

In [6]:
X_train, y_train, X_test, y_test = train_test_split(df, 
                                                    cutoff_date=datetime(2023, 6, 1, 0, 0, 0),
                                                    target_column_name='target_rides_next_hour')

In [7]:
print(f'Datasets shapes')
print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_test: {y_test.shape}')

Datasets shapes
X_train: (32595, 674)
y_train: (32595,)
X_test: (56710, 674)
y_test: (56710,)


In [8]:
from src.model import get_pipeline

In [9]:
def objective(trial: optuna.trial.Trial) -> float:

    '''
    Define the objective function to optimize the hyperparameters of the model
    Split data into folds for training and validation
    '''

    # Define the hyperparameters to optimize
    hyperparameters = {
        'metric': 'mae',
        'verbose': -1,
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.2, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.2, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 3, 100),
    }

    tss = KFold(n_splits=3)
    scores = []

    for train_index, val_index in tss.split(X_train):
        # Split the data into training and validation sets
        X_train_, X_val_ = X_train.iloc[train_index, :], X_train.iloc[val_index, :]
        y_train_, y_val_ = y_train.iloc[train_index], y_train.iloc[val_index]

        # Train the model
        pipeline = get_pipeline(**hyperparameters)
        pipeline.fit(X_train_, y_train_)

        # Evaluate the model
        y_pred = pipeline.predict(X_val_)
        mae = mean_absolute_error(y_val_, y_pred)
        scores.append(mae)

    return np.array(scores).mean()

In [10]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[I 2024-05-20 16:37:22,716] A new study created in memory with name: no-name-eed932b3-b1f8-45f8-8c29-99efca951263




[I 2024-05-20 16:37:41,548] Trial 0 finished with value: 2.577514515351623 and parameters: {'num_leaves': 43, 'feature_fraction': 0.47876925473391774, 'bagging_fraction': 0.392995726851379, 'min_child_samples': 25}. Best is trial 0 with value: 2.577514515351623.




[I 2024-05-20 16:38:01,098] Trial 1 finished with value: 2.581663449350201 and parameters: {'num_leaves': 40, 'feature_fraction': 0.906609712394846, 'bagging_fraction': 0.6112575881703828, 'min_child_samples': 14}. Best is trial 0 with value: 2.577514515351623.




[I 2024-05-20 16:38:38,486] Trial 2 finished with value: 2.542230843063005 and parameters: {'num_leaves': 129, 'feature_fraction': 0.5755546632987774, 'bagging_fraction': 0.43929307706667386, 'min_child_samples': 78}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:38:53,125] Trial 3 finished with value: 2.6295993038832104 and parameters: {'num_leaves': 21, 'feature_fraction': 0.8185558693539201, 'bagging_fraction': 0.2187912903094933, 'min_child_samples': 60}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:39:35,186] Trial 4 finished with value: 2.6700693381401712 and parameters: {'num_leaves': 159, 'feature_fraction': 0.5150086614078015, 'bagging_fraction': 0.7786926241662051, 'min_child_samples': 10}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:40:01,937] Trial 5 finished with value: 2.605184567174364 and parameters: {'num_leaves': 139, 'feature_fraction': 0.24915944056927702, 'bagging_fraction': 0.7340821801343236, 'min_child_samples': 56}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:40:14,786] Trial 6 finished with value: 2.573158646445771 and parameters: {'num_leaves': 22, 'feature_fraction': 0.632894097247408, 'bagging_fraction': 0.3201090047069888, 'min_child_samples': 30}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:40:40,836] Trial 7 finished with value: 2.8085906134535326 and parameters: {'num_leaves': 100, 'feature_fraction': 0.24932644202279466, 'bagging_fraction': 0.20786157810225464, 'min_child_samples': 7}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:41:40,126] Trial 8 finished with value: 2.586254026637761 and parameters: {'num_leaves': 197, 'feature_fraction': 0.5811984967026858, 'bagging_fraction': 0.4674680457238231, 'min_child_samples': 57}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:42:41,387] Trial 9 finished with value: 2.6729489674541536 and parameters: {'num_leaves': 182, 'feature_fraction': 0.8953447912109569, 'bagging_fraction': 0.9906985762765155, 'min_child_samples': 93}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:43:37,567] Trial 10 finished with value: 2.6330069211025897 and parameters: {'num_leaves': 251, 'feature_fraction': 0.7239453667039697, 'bagging_fraction': 0.5346060193501665, 'min_child_samples': 92}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:44:17,233] Trial 11 finished with value: 2.6640769942675253 and parameters: {'num_leaves': 90, 'feature_fraction': 0.6700229109842757, 'bagging_fraction': 0.35923500638534905, 'min_child_samples': 36}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:44:55,142] Trial 12 finished with value: 2.5633237257640995 and parameters: {'num_leaves': 92, 'feature_fraction': 0.424098256113811, 'bagging_fraction': 0.33743353293643674, 'min_child_samples': 79}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:45:28,521] Trial 13 finished with value: 2.5588392052752504 and parameters: {'num_leaves': 93, 'feature_fraction': 0.37955510024356015, 'bagging_fraction': 0.4990001845676455, 'min_child_samples': 76}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:46:04,019] Trial 14 finished with value: 2.5500032404881408 and parameters: {'num_leaves': 118, 'feature_fraction': 0.3472596440200228, 'bagging_fraction': 0.629887575697178, 'min_child_samples': 74}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:46:30,268] Trial 15 finished with value: 2.5602598642312047 and parameters: {'num_leaves': 129, 'feature_fraction': 0.34370711983837865, 'bagging_fraction': 0.6518949052356413, 'min_child_samples': 74}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:47:13,485] Trial 16 finished with value: 2.606771499206605 and parameters: {'num_leaves': 217, 'feature_fraction': 0.5411679979097194, 'bagging_fraction': 0.8593254219840017, 'min_child_samples': 67}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:47:38,703] Trial 17 finished with value: 2.6319884239295916 and parameters: {'num_leaves': 122, 'feature_fraction': 0.20026889914972587, 'bagging_fraction': 0.6850332563738305, 'min_child_samples': 46}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:48:07,370] Trial 18 finished with value: 2.6358607231132885 and parameters: {'num_leaves': 67, 'feature_fraction': 0.7505058270955256, 'bagging_fraction': 0.5592009159878144, 'min_child_samples': 85}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:48:37,759] Trial 19 finished with value: 2.5990901025563216 and parameters: {'num_leaves': 166, 'feature_fraction': 0.3454175586095911, 'bagging_fraction': 0.4382813248407061, 'min_child_samples': 45}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:49:10,213] Trial 20 finished with value: 2.6554415132142064 and parameters: {'num_leaves': 153, 'feature_fraction': 0.49445515404834345, 'bagging_fraction': 0.8946247759211808, 'min_child_samples': 100}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:49:35,365] Trial 21 finished with value: 2.580576940601647 and parameters: {'num_leaves': 115, 'feature_fraction': 0.3730179876180231, 'bagging_fraction': 0.5055243353303526, 'min_child_samples': 73}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:50:01,955] Trial 22 finished with value: 2.5504824193271176 and parameters: {'num_leaves': 72, 'feature_fraction': 0.41822514178069914, 'bagging_fraction': 0.5915472428346458, 'min_child_samples': 66}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:50:25,637] Trial 23 finished with value: 2.548838507731353 and parameters: {'num_leaves': 66, 'feature_fraction': 0.44078603835242725, 'bagging_fraction': 0.6296587761075991, 'min_child_samples': 64}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:50:44,494] Trial 24 finished with value: 2.5874265503062044 and parameters: {'num_leaves': 56, 'feature_fraction': 0.29170467838637754, 'bagging_fraction': 0.6938431093143316, 'min_child_samples': 84}. Best is trial 2 with value: 2.542230843063005.




[I 2024-05-20 16:51:19,425] Trial 25 finished with value: 2.537900371330329 and parameters: {'num_leaves': 108, 'feature_fraction': 0.4505420991346856, 'bagging_fraction': 0.773706867245351, 'min_child_samples': 65}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:51:31,755] Trial 26 finished with value: 2.6981584210951453 and parameters: {'num_leaves': 8, 'feature_fraction': 0.5830872084248272, 'bagging_fraction': 0.8149561940703273, 'min_child_samples': 66}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:51:58,617] Trial 27 finished with value: 2.6020539574816666 and parameters: {'num_leaves': 79, 'feature_fraction': 0.4463226133482249, 'bagging_fraction': 0.7329640598631231, 'min_child_samples': 50}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:52:45,849] Trial 28 finished with value: 2.6054210012142653 and parameters: {'num_leaves': 147, 'feature_fraction': 0.5512331424922793, 'bagging_fraction': 0.9360948650642007, 'min_child_samples': 62}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:53:15,625] Trial 29 finished with value: 2.6595834891440653 and parameters: {'num_leaves': 109, 'feature_fraction': 0.4741297149653599, 'bagging_fraction': 0.4311447078276227, 'min_child_samples': 41}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:53:40,668] Trial 30 finished with value: 2.666498187535104 and parameters: {'num_leaves': 46, 'feature_fraction': 0.9952575838144114, 'bagging_fraction': 0.2767716554287081, 'min_child_samples': 53}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:54:17,321] Trial 31 finished with value: 2.56266431995001 and parameters: {'num_leaves': 135, 'feature_fraction': 0.4677557672769279, 'bagging_fraction': 0.6251255520220468, 'min_child_samples': 70}. Best is trial 25 with value: 2.537900371330329.




[I 2024-05-20 16:54:56,960] Trial 32 finished with value: 2.536943483959074 and parameters: {'num_leaves': 110, 'feature_fraction': 0.6274565145921149, 'bagging_fraction': 0.7905579987217446, 'min_child_samples': 82}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:55:23,171] Trial 33 finished with value: 2.588584272251116 and parameters: {'num_leaves': 58, 'feature_fraction': 0.6743630517148362, 'bagging_fraction': 0.8019549989769471, 'min_child_samples': 83}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:55:44,351] Trial 34 finished with value: 2.5955846809219487 and parameters: {'num_leaves': 35, 'feature_fraction': 0.6244136868988903, 'bagging_fraction': 0.7512807023799526, 'min_child_samples': 89}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:56:28,138] Trial 35 finished with value: 2.6286996494490773 and parameters: {'num_leaves': 169, 'feature_fraction': 0.5238507257859928, 'bagging_fraction': 0.8452811039088396, 'min_child_samples': 62}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:57:05,478] Trial 36 finished with value: 2.6324241063394096 and parameters: {'num_leaves': 104, 'feature_fraction': 0.7642527920955954, 'bagging_fraction': 0.6803004273044093, 'min_child_samples': 97}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:57:37,619] Trial 37 finished with value: 2.575988914025576 and parameters: {'num_leaves': 82, 'feature_fraction': 0.6699709225047572, 'bagging_fraction': 0.7521788765180574, 'min_child_samples': 81}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:58:18,557] Trial 38 finished with value: 2.5967598018176754 and parameters: {'num_leaves': 131, 'feature_fraction': 0.6037895377765964, 'bagging_fraction': 0.5836017847019406, 'min_child_samples': 58}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:59:04,327] Trial 39 finished with value: 2.5373283548159082 and parameters: {'num_leaves': 147, 'feature_fraction': 0.8079809520713782, 'bagging_fraction': 0.27038908253500893, 'min_child_samples': 19}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 16:59:48,535] Trial 40 finished with value: 2.5722674199020727 and parameters: {'num_leaves': 141, 'feature_fraction': 0.8437108619333503, 'bagging_fraction': 0.25690074563613857, 'min_child_samples': 25}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:00:40,882] Trial 41 finished with value: 2.5924600007267267 and parameters: {'num_leaves': 185, 'feature_fraction': 0.8160708565110484, 'bagging_fraction': 0.37651988016003757, 'min_child_samples': 14}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:01:16,988] Trial 42 finished with value: 2.6148075693120885 and parameters: {'num_leaves': 107, 'feature_fraction': 0.9509938464847172, 'bagging_fraction': 0.2993908015011788, 'min_child_samples': 13}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:01:56,615] Trial 43 finished with value: 2.6082835560856936 and parameters: {'num_leaves': 155, 'feature_fraction': 0.711895895146043, 'bagging_fraction': 0.4366606443199947, 'min_child_samples': 89}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:02:28,012] Trial 44 finished with value: 2.5786849232492197 and parameters: {'num_leaves': 119, 'feature_fraction': 0.5517582855846911, 'bagging_fraction': 0.20826212927377002, 'min_child_samples': 78}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:03:21,841] Trial 45 finished with value: 2.5399878968370855 and parameters: {'num_leaves': 207, 'feature_fraction': 0.8763190976057046, 'bagging_fraction': 0.9343334572884158, 'min_child_samples': 70}. Best is trial 32 with value: 2.536943483959074.




[I 2024-05-20 17:04:16,685] Trial 46 finished with value: 2.5352490902606153 and parameters: {'num_leaves': 221, 'feature_fraction': 0.859372243239578, 'bagging_fraction': 0.9479088961265093, 'min_child_samples': 70}. Best is trial 46 with value: 2.5352490902606153.




[I 2024-05-20 17:05:28,397] Trial 47 finished with value: 2.70913712657659 and parameters: {'num_leaves': 227, 'feature_fraction': 0.8497888928494439, 'bagging_fraction': 0.9858990168591925, 'min_child_samples': 4}. Best is trial 46 with value: 2.5352490902606153.




[I 2024-05-20 17:06:18,951] Trial 48 finished with value: 2.5389008368440966 and parameters: {'num_leaves': 247, 'feature_fraction': 0.9151318031169401, 'bagging_fraction': 0.9227886556618077, 'min_child_samples': 70}. Best is trial 46 with value: 2.5352490902606153.




[I 2024-05-20 17:07:13,407] Trial 49 finished with value: 2.674702190481822 and parameters: {'num_leaves': 256, 'feature_fraction': 0.9218215647261424, 'bagging_fraction': 0.8988260683269481, 'min_child_samples': 55}. Best is trial 46 with value: 2.5352490902606153.




[I 2024-05-20 17:08:04,667] Trial 50 finished with value: 2.5566064628691105 and parameters: {'num_leaves': 239, 'feature_fraction': 0.8059519360402888, 'bagging_fraction': 0.955953217238482, 'min_child_samples': 19}. Best is trial 46 with value: 2.5352490902606153.




[I 2024-05-20 17:09:03,147] Trial 51 finished with value: 2.53193186218385 and parameters: {'num_leaves': 205, 'feature_fraction': 0.8846548553560787, 'bagging_fraction': 0.9177091402844645, 'min_child_samples': 70}. Best is trial 51 with value: 2.53193186218385.




[I 2024-05-20 17:09:49,044] Trial 52 finished with value: 2.52150880697246 and parameters: {'num_leaves': 241, 'feature_fraction': 0.9173905717971508, 'bagging_fraction': 0.8850277325059562, 'min_child_samples': 70}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:10:32,773] Trial 53 finished with value: 2.608464077908455 and parameters: {'num_leaves': 220, 'feature_fraction': 0.9658306772304334, 'bagging_fraction': 0.8476308266268937, 'min_child_samples': 77}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:11:16,184] Trial 54 finished with value: 2.597174198238245 and parameters: {'num_leaves': 199, 'feature_fraction': 0.7772976303540299, 'bagging_fraction': 0.8832675427332705, 'min_child_samples': 59}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:12:06,400] Trial 55 finished with value: 2.6720095458038338 and parameters: {'num_leaves': 231, 'feature_fraction': 0.8633441715636583, 'bagging_fraction': 0.8019227912336061, 'min_child_samples': 32}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:12:49,926] Trial 56 finished with value: 2.644407814457418 and parameters: {'num_leaves': 181, 'feature_fraction': 0.9351885542122641, 'bagging_fraction': 0.9669349646571778, 'min_child_samples': 72}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:13:35,523] Trial 57 finished with value: 2.5711380216876503 and parameters: {'num_leaves': 242, 'feature_fraction': 0.885012882760201, 'bagging_fraction': 0.870008239125036, 'min_child_samples': 68}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:14:16,247] Trial 58 finished with value: 2.70683899129815 and parameters: {'num_leaves': 206, 'feature_fraction': 0.9966378132229662, 'bagging_fraction': 0.8354959087178772, 'min_child_samples': 88}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:15:05,975] Trial 59 finished with value: 2.6188228763102965 and parameters: {'num_leaves': 233, 'feature_fraction': 0.7890433897661445, 'bagging_fraction': 0.9131368826205493, 'min_child_samples': 50}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:15:53,671] Trial 60 finished with value: 2.6065382105094703 and parameters: {'num_leaves': 214, 'feature_fraction': 0.7426585640765877, 'bagging_fraction': 0.7645916343337573, 'min_child_samples': 80}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:16:36,639] Trial 61 finished with value: 2.6261818052463473 and parameters: {'num_leaves': 248, 'feature_fraction': 0.9084523549469172, 'bagging_fraction': 0.9285691660262243, 'min_child_samples': 75}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:17:21,994] Trial 62 finished with value: 2.5560796595396584 and parameters: {'num_leaves': 222, 'feature_fraction': 0.8324805103115639, 'bagging_fraction': 0.9993826064057406, 'min_child_samples': 64}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:18:09,056] Trial 63 finished with value: 2.585978131737346 and parameters: {'num_leaves': 240, 'feature_fraction': 0.9548717870392105, 'bagging_fraction': 0.9622891330739081, 'min_child_samples': 69}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:18:52,026] Trial 64 finished with value: 2.584722076007766 and parameters: {'num_leaves': 192, 'feature_fraction': 0.8940139220754633, 'bagging_fraction': 0.8736736873950901, 'min_child_samples': 73}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:19:42,187] Trial 65 finished with value: 2.644519315296467 and parameters: {'num_leaves': 249, 'feature_fraction': 0.9201259847962772, 'bagging_fraction': 0.7155765775082866, 'min_child_samples': 61}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:20:05,992] Trial 66 finished with value: 2.5381373439095154 and parameters: {'num_leaves': 94, 'feature_fraction': 0.7032305727284519, 'bagging_fraction': 0.8167613745216011, 'min_child_samples': 66}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:20:30,178] Trial 67 finished with value: 2.5626394900242864 and parameters: {'num_leaves': 96, 'feature_fraction': 0.6818637644841243, 'bagging_fraction': 0.7863854318500153, 'min_child_samples': 65}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:20:59,997] Trial 68 finished with value: 2.6357748679721893 and parameters: {'num_leaves': 126, 'feature_fraction': 0.7263017173818485, 'bagging_fraction': 0.8185320521510504, 'min_child_samples': 56}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:21:21,205] Trial 69 finished with value: 2.643798930706547 and parameters: {'num_leaves': 86, 'feature_fraction': 0.6292571892182041, 'bagging_fraction': 0.7778343333210007, 'min_child_samples': 46}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:21:58,180] Trial 70 finished with value: 2.572779390842687 and parameters: {'num_leaves': 175, 'feature_fraction': 0.6892865010430681, 'bagging_fraction': 0.8285192852035661, 'min_child_samples': 77}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:22:28,946] Trial 71 finished with value: 2.540093247869025 and parameters: {'num_leaves': 115, 'feature_fraction': 0.8737152745290032, 'bagging_fraction': 0.9031202617281088, 'min_child_samples': 70}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:22:57,116] Trial 72 finished with value: 2.607936324763895 and parameters: {'num_leaves': 101, 'feature_fraction': 0.830848523616105, 'bagging_fraction': 0.9230590457445724, 'min_child_samples': 82}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:23:38,153] Trial 73 finished with value: 2.6133493535999412 and parameters: {'num_leaves': 207, 'feature_fraction': 0.8052815441311342, 'bagging_fraction': 0.8582431186682041, 'min_child_samples': 72}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:24:14,371] Trial 74 finished with value: 2.5448558110435275 and parameters: {'num_leaves': 145, 'feature_fraction': 0.8507954095718482, 'bagging_fraction': 0.949431821989036, 'min_child_samples': 63}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:24:38,802] Trial 75 finished with value: 2.6792079438640113 and parameters: {'num_leaves': 76, 'feature_fraction': 0.9774792845202316, 'bagging_fraction': 0.8864667258460267, 'min_child_samples': 86}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:25:24,053] Trial 76 finished with value: 2.5665281670247997 and parameters: {'num_leaves': 225, 'feature_fraction': 0.6416023080381053, 'bagging_fraction': 0.9799965405631054, 'min_child_samples': 67}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:25:54,238] Trial 77 finished with value: 2.703959911645542 and parameters: {'num_leaves': 92, 'feature_fraction': 0.9402681546954541, 'bagging_fraction': 0.7248042213072391, 'min_child_samples': 38}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:26:42,588] Trial 78 finished with value: 2.637584933364778 and parameters: {'num_leaves': 213, 'feature_fraction': 0.9048795093133196, 'bagging_fraction': 0.6574766165359381, 'min_child_samples': 59}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:26:59,993] Trial 79 finished with value: 2.555554546600255 and parameters: {'num_leaves': 111, 'feature_fraction': 0.2907238756408411, 'bagging_fraction': 0.8002445251572259, 'min_child_samples': 74}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:27:30,760] Trial 80 finished with value: 2.6304525088895034 and parameters: {'num_leaves': 254, 'feature_fraction': 0.6011865944908801, 'bagging_fraction': 0.944775335939621, 'min_child_samples': 93}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:28:10,255] Trial 81 finished with value: 2.5618804979003977 and parameters: {'num_leaves': 233, 'feature_fraction': 0.6474412257073173, 'bagging_fraction': 0.9161612649719735, 'min_child_samples': 68}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:28:53,670] Trial 82 finished with value: 2.6173557306333315 and parameters: {'num_leaves': 202, 'feature_fraction': 0.8794516124428241, 'bagging_fraction': 0.937165001774572, 'min_child_samples': 71}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:29:34,771] Trial 83 finished with value: 2.6122789115363574 and parameters: {'num_leaves': 192, 'feature_fraction': 0.8662323600114934, 'bagging_fraction': 0.8620528500841953, 'min_child_samples': 76}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:30:14,964] Trial 84 finished with value: 2.599902214996176 and parameters: {'num_leaves': 246, 'feature_fraction': 0.7962911344729801, 'bagging_fraction': 0.8916854234024089, 'min_child_samples': 80}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:30:47,714] Trial 85 finished with value: 2.5515772019458525 and parameters: {'num_leaves': 161, 'feature_fraction': 0.5715205503025453, 'bagging_fraction': 0.4021379305584091, 'min_child_samples': 65}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:31:31,705] Trial 86 finished with value: 2.6497694199576287 and parameters: {'num_leaves': 208, 'feature_fraction': 0.7548780782400727, 'bagging_fraction': 0.4788531890480794, 'min_child_samples': 53}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:32:21,001] Trial 87 finished with value: 2.6363283966501254 and parameters: {'num_leaves': 236, 'feature_fraction': 0.9282970226621258, 'bagging_fraction': 0.9737497558420323, 'min_child_samples': 62}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:33:05,587] Trial 88 finished with value: 2.5521712857455783 and parameters: {'num_leaves': 227, 'feature_fraction': 0.8323355389246324, 'bagging_fraction': 0.912517139252416, 'min_child_samples': 67}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:33:52,077] Trial 89 finished with value: 2.546514226864622 and parameters: {'num_leaves': 218, 'feature_fraction': 0.9757306708370591, 'bagging_fraction': 0.8414658718068432, 'min_child_samples': 70}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:34:19,735] Trial 90 finished with value: 2.5584066827424734 and parameters: {'num_leaves': 122, 'feature_fraction': 0.707822923224189, 'bagging_fraction': 0.8774508623924642, 'min_child_samples': 26}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:34:51,337] Trial 91 finished with value: 2.5438701773690617 and parameters: {'num_leaves': 116, 'feature_fraction': 0.8770356876300196, 'bagging_fraction': 0.9008334949961447, 'min_child_samples': 70}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:35:23,516] Trial 92 finished with value: 2.598774738410088 and parameters: {'num_leaves': 99, 'feature_fraction': 0.8622191475535527, 'bagging_fraction': 0.5298432145876639, 'min_child_samples': 74}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:36:02,581] Trial 93 finished with value: 2.618839232514319 and parameters: {'num_leaves': 133, 'feature_fraction': 0.8979668693890365, 'bagging_fraction': 0.9303382337077277, 'min_child_samples': 78}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:36:34,553] Trial 94 finished with value: 2.622059385357152 and parameters: {'num_leaves': 125, 'feature_fraction': 0.8206453804588881, 'bagging_fraction': 0.90119038213228, 'min_child_samples': 72}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:37:02,574] Trial 95 finished with value: 2.5990153383804784 and parameters: {'num_leaves': 112, 'feature_fraction': 0.7740675111732038, 'bagging_fraction': 0.8541592383292518, 'min_child_samples': 60}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:37:31,510] Trial 96 finished with value: 2.553627325719774 and parameters: {'num_leaves': 104, 'feature_fraction': 0.9158661428068593, 'bagging_fraction': 0.9498350752508037, 'min_child_samples': 69}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:38:07,794] Trial 97 finished with value: 2.5905058310599047 and parameters: {'num_leaves': 243, 'feature_fraction': 0.5216668775884059, 'bagging_fraction': 0.817021977659565, 'min_child_samples': 65}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:38:33,257] Trial 98 finished with value: 2.635399997978187 and parameters: {'num_leaves': 84, 'feature_fraction': 0.9500912543125988, 'bagging_fraction': 0.24522179769008418, 'min_child_samples': 75}. Best is trial 52 with value: 2.52150880697246.




[I 2024-05-20 17:38:59,640] Trial 99 finished with value: 2.647350747104285 and parameters: {'num_leaves': 88, 'feature_fraction': 0.8849754932767284, 'bagging_fraction': 0.995617931276432, 'min_child_samples': 97}. Best is trial 52 with value: 2.52150880697246.


In [11]:
best_params = study.best_trial.params
print(f'{best_params=}')

best_params={'num_leaves': 241, 'feature_fraction': 0.9173905717971508, 'bagging_fraction': 0.8850277325059562, 'min_child_samples': 70}


In [12]:
pipeline = get_pipeline(**best_params)
pipeline.fit(X_train, y_train)



In [13]:
predictions = pipeline.predict(X_test)

In [14]:
residuals = y_test - predictions

residuals.to_csv(RESIDUALS_DATA_DIR / 'residuals_lightgmb_3.csv', index=False)

In [15]:
metrics = evaluate_metrics(y_test, predictions)

for metric, value in metrics.items():
    print(f'{metric}: {value:.2f}')

MAE: 2.91
MSE: 127.62
RMSE: 11.30
SMAPE: 149.86
R: 0.96
R-squared: 0.92


In [16]:
model_name = 'LightGBM with Hyperparameter Tuning'
metrics['Model'] = model_name
save_metrics(metrics)

In [17]:
%reload_ext autoreload
%autoreload 2

In [18]:
from src.plot import plot_one_sample

In [19]:
plot_one_sample(
    example_id=16849,
    features=X_test,
    targets=y_test,
    predictions=pd.Series(predictions)
)

In [20]:
plot_one_sample(
    example_id=56700,
    features=X_test,
    targets=y_test,
    predictions=pd.Series(predictions)
)

In [21]:
max_value = predictions.max()
argmax = predictions.argmax()
print(f'Max value of the predictions: {max_value:.2f}')
print(f'Index of the max value: {argmax}')

Max value of the predictions: 522.14
Index of the max value: 16701
