In [1]:
import os
import gc
import pickle

import numpy as np
import pandas as pd
import polars as pl

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import mean_absolute_error
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import VotingRegressor

import lightgbm as lgb

import optuna

In [2]:
class MonthlyKFold:
    def __init__(self, n_splits=3):
        self.n_splits = n_splits
        
    def split(self, X, y, groups=None):
        dates = 12 * X["year"] + X["month"]
        timesteps = sorted(dates.unique().tolist())
        X = X.reset_index()
        
        for t in timesteps[-self.n_splits:]:
            idx_train = X[dates.values < t].index
            idx_test = X[dates.values == t].index
            
            yield idx_train, idx_test
            
    def get_n_splits(self, X, y, groups=None):
        return self.n_splits

In [3]:
def feature_eng(df_data, df_client, df_gas, df_electricity, df_forecast, df_historical, df_location, df_target):
    df_data = (
        df_data
        .with_columns(
            pl.col("datetime").cast(pl.Date).alias("date"),
        )
    )
    
    df_client = (
        df_client
        .with_columns(
            (pl.col("date") + pl.duration(days=2)).cast(pl.Date)
        )
    )
    
    df_gas = (
        df_gas
        .rename({"forecast_date": "date"})
        .with_columns(
            (pl.col("date") + pl.duration(days=1)).cast(pl.Date)
        )
    )
    
    df_electricity = (
        df_electricity
        .rename({"forecast_date": "datetime"})
        .with_columns(
            pl.col("datetime") + pl.duration(days=1)
        )
    )
    
    df_location = (
        df_location
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32)
        )
    )
    
    df_forecast = (
        df_forecast
        .rename({"forecast_datetime": "datetime"})
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32),
            pl.col('datetime').dt.convert_time_zone("Europe/Bucharest").dt.replace_time_zone(None).cast(pl.Datetime("us")),
        )
        .join(df_location, how="left", on=["longitude", "latitude"])
        .drop("longitude", "latitude")
    )
    
    df_historical = (
        df_historical
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32),
            pl.col("datetime") + pl.duration(hours=37)
        )
        .join(df_location, how="left", on=["longitude", "latitude"])
        .drop("longitude", "latitude")
    )
    
    df_forecast_date = (
        df_forecast
        .group_by("datetime").mean()
        .drop("county")
    )
    
    df_forecast_local = (
        df_forecast
        .filter(pl.col("county").is_not_null())
        .group_by("county", "datetime").mean()
    )
    
    df_historical_date = (
        df_historical
        .group_by("datetime").mean()
        .drop("county")
    )
    
    df_historical_local = (
        df_historical
        .filter(pl.col("county").is_not_null())
        .group_by("county", "datetime").mean()
    )
    
    df_data = (
        df_data
        .join(df_gas, on="date", how="left")
        .join(df_client, on=["county", "is_business", "product_type", "date"], how="left")
        .join(df_electricity, on="datetime", how="left")
        
        .join(df_forecast_date, on="datetime", how="left", suffix="_fd")
        .join(df_forecast_local, on=["county", "datetime"], how="left", suffix="_fl")
        .join(df_historical_date, on="datetime", how="left", suffix="_hd")
        .join(df_historical_local, on=["county", "datetime"], how="left", suffix="_hl")
        
        .join(df_forecast_date.with_columns(pl.col("datetime") + pl.duration(days=7)), on="datetime", how="left", suffix="_fdw")
        .join(df_forecast_local.with_columns(pl.col("datetime") + pl.duration(days=7)), on=["county", "datetime"], how="left", suffix="_flw")
        .join(df_historical_date.with_columns(pl.col("datetime") + pl.duration(days=7)), on="datetime", how="left", suffix="_hdw")
        .join(df_historical_local.with_columns(pl.col("datetime") + pl.duration(days=7)), on=["county", "datetime"], how="left", suffix="_hlw")
        
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=2)).rename({"target": "target_1"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=3)).rename({"target": "target_2"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=4)).rename({"target": "target_3"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=5)).rename({"target": "target_4"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=6)).rename({"target": "target_5"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=7)).rename({"target": "target_6"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=14)).rename({"target": "target_7"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        
        .with_columns(
            pl.col("datetime").dt.ordinal_day().alias("dayofyear"),
            pl.col("datetime").dt.hour().alias("hour"),
            pl.col("datetime").dt.day().alias("day"),
            pl.col("datetime").dt.weekday().alias("weekday"),
            pl.col("datetime").dt.month().alias("month"),
            pl.col("datetime").dt.year().alias("year"),
        )
        
        .with_columns(
            pl.concat_str("county", "is_business", "product_type", "is_consumption", separator="_").alias("category_1"),
        )
        
        .with_columns(
            (np.pi * pl.col("dayofyear") / 183).sin().alias("sin(dayofyear)"),
            (np.pi * pl.col("dayofyear") / 183).cos().alias("cos(dayofyear)"),
            (np.pi * pl.col("hour") / 12).sin().alias("sin(hour)"),
            (np.pi * pl.col("hour") / 12).cos().alias("cos(hour)"),
        )
        
        .with_columns(
            pl.col(pl.Float64).cast(pl.Float32),
        )
        
        .drop("date", "datetime", "hour", "dayofyear")
    )
    
    return df_data

In [4]:
def to_pandas(X, y=None):
    cat_cols = ["county", "is_business", "product_type", "is_consumption", "category_1"]
    
    if y is not None:
        df = pd.concat([X.to_pandas(), y.to_pandas()], axis=1)
    else:
        df = X.to_pandas()    
    
    df = df.set_index("row_id")
    df[cat_cols] = df[cat_cols].astype("category")
    
    df["target_mean"] = df[[f"target_{i}" for i in range(1, 7)]].mean(1)
    df["target_std"] = df[[f"target_{i}" for i in range(1, 7)]].std(1)
    df["target_ratio"] = df["target_6"] / (df["target_7"] + 1e-3)
    
    return df

In [5]:
#カスタム目的関数の実装
#Fair関数
def Fair(preds, dtrain):
    x = preds-dtrain.labels()
    c = 1.0
    den = abs(x)+c
    grad = c*x/den
    hess =c*c/den**2
    
    return grad, hess

# Psuedo_Hunber関数
def psuedo_huber(preds, dtrain):
    d = preds-dtrain.labels()
    delta = 1.0
    scale = 1+(d/delta)**2
    scale_sqrt = np.sqrt(scale)
    grad = d/scale_sqrt
    hess = 1/scale/scale_sqrt
    return grad, hess
    
    


In [6]:
def lgb_objective(trial):
    params = {
        'n_iter'           : 1000,
        'verbose'          : -1,
        'random_state'     : 42,
        'learning_rate'    : trial.suggest_float('learning_rate', 0.01, 0.1),
        'colsample_bytree' : trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'colsample_bynode' : trial.suggest_float('colsample_bynode', 0.5, 1.0),
        'lambda_l1'        : trial.suggest_float('lambda_l1', 1e-2, 10.0),
        'lambda_l2'        : trial.suggest_float('lambda_l2', 1e-2, 10.0),
        'min_data_in_leaf' : trial.suggest_int('min_data_in_leaf', 4, 256),
        'max_depth'        : trial.suggest_int('max_depth', 5, 10),
        'max_bin'          : trial.suggest_int('max_bin', 32, 1024),
    }
    
    # 損失関数を選択
    objective_choice = trial.suggest_categorical('objective', ['regression', 'regression_l1', 'huber', 'fair'])
    params['objective'] = objective_choice
    
    model  = lgb.LGBMRegressor(**params)
    X, y   = df_train.drop(columns=["target"]), df_train["target"]
    cv     = MonthlyKFold(1)
    scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_absolute_error')
    
    return -1 * np.mean(scores)

### Global Variables

In [7]:
root = "/Users/sekishunfutoshi/Desktop/kaggle/enefit/input/predict-energy-behavior-of-prosumers"

data_cols        = ['target', 'county', 'is_business', 'product_type', 'is_consumption', 'datetime', 'row_id']
client_cols      = ['product_type', 'county', 'eic_count', 'installed_capacity', 'is_business', 'date']
gas_cols         = ['forecast_date', 'lowest_price_per_mwh', 'highest_price_per_mwh']
electricity_cols = ['forecast_date', 'euros_per_mwh']
forecast_cols    = ['latitude', 'longitude', 'hours_ahead', 'temperature', 'dewpoint', 'cloudcover_high', 'cloudcover_low', 'cloudcover_mid', 'cloudcover_total', '10_metre_u_wind_component', '10_metre_v_wind_component', 'forecast_datetime', 'direct_solar_radiation', 'surface_solar_radiation_downwards', 'snowfall', 'total_precipitation']
historical_cols  = ['datetime', 'temperature', 'dewpoint', 'rain', 'snowfall', 'surface_pressure','cloudcover_total','cloudcover_low','cloudcover_mid','cloudcover_high','windspeed_10m','winddirection_10m','shortwave_radiation','direct_solar_radiation','diffuse_radiation','latitude','longitude']
location_cols    = ['longitude', 'latitude', 'county']
target_cols      = ['target', 'county', 'is_business', 'product_type', 'is_consumption', 'datetime']

save_path = None
load_path = None

### Data I/O

In [8]:
df_data        = pl.read_csv(os.path.join(root, "train.csv"), columns=data_cols, try_parse_dates=True)
df_client      = pl.read_csv(os.path.join(root, "client.csv"), columns=client_cols, try_parse_dates=True)
df_gas         = pl.read_csv(os.path.join(root, "gas_prices.csv"), columns=gas_cols, try_parse_dates=True)
df_electricity = pl.read_csv(os.path.join(root, "electricity_prices.csv"), columns=electricity_cols, try_parse_dates=True)
df_forecast    = pl.read_csv(os.path.join(root, "forecast_weather.csv"), columns=forecast_cols, try_parse_dates=True)
df_historical  = pl.read_csv(os.path.join(root, "historical_weather.csv"), columns=historical_cols, try_parse_dates=True)
df_location    = pl.read_csv(os.path.join(root, "weather_station_to_county_mapping.csv"), columns=location_cols, try_parse_dates=True)
df_target      = df_data.select(target_cols)

schema_data        = df_data.schema
schema_client      = df_client.schema
schema_gas         = df_gas.schema
schema_electricity = df_electricity.schema
schema_forecast    = df_forecast.schema
schema_historical  = df_historical.schema
schema_target      = df_target.schema

### Feature Engineering

In [9]:
X, y = df_data.drop("target"), df_data.select("target")

X = feature_eng(X, df_client, df_gas, df_electricity, df_forecast, df_historical, df_location, df_target)

df_train = to_pandas(X, y)

In [10]:
df_train = df_train[df_train["target"].notnull() & df_train["year"].gt(2021)]

In [11]:
df_train.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 1651902 entries, 366048 to 2018351
Data columns (total 137 columns):
 #    Column                                 Dtype   
---   ------                                 -----   
 0    county                                 category
 1    is_business                            category
 2    product_type                           category
 3    is_consumption                         category
 4    lowest_price_per_mwh                   float32 
 5    highest_price_per_mwh                  float32 
 6    eic_count                              float64 
 7    installed_capacity                     float32 
 8    euros_per_mwh                          float32 
 9    hours_ahead                            float32 
 10   temperature                            float32 
 11   dewpoint                               float32 
 12   cloudcover_high                        float32 
 13   cloudcover_low                         float32 
 14   cloudcover_mid  

### HyperParam Optimization

In [12]:
study = optuna.create_study(direction='minimize', study_name='Regressor')
study.optimize(lgb_objective, n_trials=100, show_progress_bar=True)
best_params = study.best_params
print("Best Params:", best_params)

[I 2023-12-19 21:30:30,018] A new study created in memory with name: Regressor


  0%|          | 0/100 [00:00<?, ?it/s]



[I 2023-12-19 21:32:01,176] Trial 0 finished with value: 76.48091958419452 and parameters: {'learning_rate': 0.06465059072241587, 'colsample_bytree': 0.8748229841353639, 'colsample_bynode': 0.7108665539079917, 'lambda_l1': 4.0797071179005515, 'lambda_l2': 7.454904240186609, 'min_data_in_leaf': 88, 'max_depth': 9, 'max_bin': 63, 'objective': 'regression_l1'}. Best is trial 0 with value: 76.48091958419452.




[I 2023-12-19 21:33:07,507] Trial 1 finished with value: 69.5552808406666 and parameters: {'learning_rate': 0.02219172909504387, 'colsample_bytree': 0.7554172950022503, 'colsample_bynode': 0.9880321290172285, 'lambda_l1': 8.353236959983276, 'lambda_l2': 1.4377678302467545, 'min_data_in_leaf': 112, 'max_depth': 7, 'max_bin': 52, 'objective': 'regression'}. Best is trial 1 with value: 69.5552808406666.




[I 2023-12-19 21:34:30,531] Trial 2 finished with value: 65.40307941067593 and parameters: {'learning_rate': 0.09989744572126025, 'colsample_bytree': 0.6121522255874319, 'colsample_bynode': 0.6906084812223319, 'lambda_l1': 2.885467722168561, 'lambda_l2': 3.883115003529118, 'min_data_in_leaf': 144, 'max_depth': 7, 'max_bin': 984, 'objective': 'regression'}. Best is trial 2 with value: 65.40307941067593.




[I 2023-12-19 21:36:18,283] Trial 3 finished with value: 367.33986826391003 and parameters: {'learning_rate': 0.06391453610188177, 'colsample_bytree': 0.6382778855972657, 'colsample_bynode': 0.7138064876223513, 'lambda_l1': 0.6259583226675436, 'lambda_l2': 4.329948934350033, 'min_data_in_leaf': 246, 'max_depth': 10, 'max_bin': 556, 'objective': 'huber'}. Best is trial 2 with value: 65.40307941067593.




[I 2023-12-19 21:37:25,265] Trial 4 finished with value: 63.62048636081667 and parameters: {'learning_rate': 0.08665276668423388, 'colsample_bytree': 0.6595331374769489, 'colsample_bynode': 0.6633697537681054, 'lambda_l1': 2.4917053349053955, 'lambda_l2': 2.092600517890021, 'min_data_in_leaf': 247, 'max_depth': 9, 'max_bin': 288, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:39:22,373] Trial 5 finished with value: 78.7836864639944 and parameters: {'learning_rate': 0.01981782572118751, 'colsample_bytree': 0.5438522919530302, 'colsample_bynode': 0.8188020685414101, 'lambda_l1': 0.7950218842628548, 'lambda_l2': 8.236710470019885, 'min_data_in_leaf': 13, 'max_depth': 9, 'max_bin': 910, 'objective': 'regression_l1'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:41:31,065] Trial 6 finished with value: 69.54646206777048 and parameters: {'learning_rate': 0.08012760298491657, 'colsample_bytree': 0.6072062966264754, 'colsample_bynode': 0.5897300235295948, 'lambda_l1': 7.42576211368013, 'lambda_l2': 2.6386615139289766, 'min_data_in_leaf': 198, 'max_depth': 8, 'max_bin': 867, 'objective': 'regression_l1'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:43:19,882] Trial 7 finished with value: 378.41616480739714 and parameters: {'learning_rate': 0.049058513474590094, 'colsample_bytree': 0.6247288352393372, 'colsample_bynode': 0.5833384971119546, 'lambda_l1': 4.172596549039515, 'lambda_l2': 7.65445679033524, 'min_data_in_leaf': 241, 'max_depth': 10, 'max_bin': 579, 'objective': 'huber'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:44:29,265] Trial 8 finished with value: 67.81587936789185 and parameters: {'learning_rate': 0.035144301820152046, 'colsample_bytree': 0.7589321571671312, 'colsample_bynode': 0.9514384613418655, 'lambda_l1': 2.6579149750068187, 'lambda_l2': 1.8312836659733998, 'min_data_in_leaf': 74, 'max_depth': 5, 'max_bin': 299, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:45:42,729] Trial 9 finished with value: 68.15329174515344 and parameters: {'learning_rate': 0.03764733885043298, 'colsample_bytree': 0.8585245942672063, 'colsample_bynode': 0.7055628080031828, 'lambda_l1': 9.464543934565256, 'lambda_l2': 8.196507943828655, 'min_data_in_leaf': 244, 'max_depth': 5, 'max_bin': 489, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:47:03,554] Trial 10 finished with value: 4887.807039040603 and parameters: {'learning_rate': 0.09996052200131161, 'colsample_bytree': 0.9825100338820962, 'colsample_bynode': 0.5080509339818833, 'lambda_l1': 6.207585286901207, 'lambda_l2': 0.3051335939602433, 'min_data_in_leaf': 169, 'max_depth': 8, 'max_bin': 271, 'objective': 'fair'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:48:07,001] Trial 11 finished with value: 64.98110296573678 and parameters: {'learning_rate': 0.09914359797065074, 'colsample_bytree': 0.5022983030203444, 'colsample_bynode': 0.7985379925620711, 'lambda_l1': 2.583368018521911, 'lambda_l2': 3.6871219058058133, 'min_data_in_leaf': 163, 'max_depth': 7, 'max_bin': 756, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:49:12,480] Trial 12 finished with value: 65.24207051410292 and parameters: {'learning_rate': 0.08226877214236557, 'colsample_bytree': 0.5142360094644401, 'colsample_bynode': 0.8123778336517407, 'lambda_l1': 2.1421234755398313, 'lambda_l2': 5.215836492320907, 'min_data_in_leaf': 177, 'max_depth': 6, 'max_bin': 751, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:50:19,496] Trial 13 finished with value: 359.76480099138473 and parameters: {'learning_rate': 0.08743909107903082, 'colsample_bytree': 0.5154019237356368, 'colsample_bynode': 0.8261602208550946, 'lambda_l1': 5.241025102211335, 'lambda_l2': 3.08405553433385, 'min_data_in_leaf': 209, 'max_depth': 9, 'max_bin': 328, 'objective': 'fair'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:51:40,635] Trial 14 finished with value: 64.92338181097799 and parameters: {'learning_rate': 0.0889141842059998, 'colsample_bytree': 0.6698243452264915, 'colsample_bynode': 0.7726252811502977, 'lambda_l1': 1.529327834929103, 'lambda_l2': 5.529738437670693, 'min_data_in_leaf': 150, 'max_depth': 6, 'max_bin': 726, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:52:53,779] Trial 15 finished with value: 64.6992887257198 and parameters: {'learning_rate': 0.07047150949129645, 'colsample_bytree': 0.7080188486843904, 'colsample_bynode': 0.6241677423846607, 'lambda_l1': 0.09304450207105108, 'lambda_l2': 5.4810874443837845, 'min_data_in_leaf': 29, 'max_depth': 6, 'max_bin': 439, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:54:04,558] Trial 16 finished with value: 65.11028290904702 and parameters: {'learning_rate': 0.0698032170979479, 'colsample_bytree': 0.710399887615521, 'colsample_bynode': 0.6412141249048706, 'lambda_l1': 1.3251919889664439, 'lambda_l2': 9.936639573930316, 'min_data_in_leaf': 7, 'max_depth': 6, 'max_bin': 401, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:55:33,039] Trial 17 finished with value: 359.0350087154837 and parameters: {'learning_rate': 0.07532291960313897, 'colsample_bytree': 0.6974424929448676, 'colsample_bynode': 0.6192211532447492, 'lambda_l1': 0.09242018558568077, 'lambda_l2': 5.914405953676937, 'min_data_in_leaf': 40, 'max_depth': 8, 'max_bin': 191, 'objective': 'huber'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:56:50,898] Trial 18 finished with value: 33968.03675091853 and parameters: {'learning_rate': 0.0609314964785552, 'colsample_bytree': 0.7798983856698113, 'colsample_bynode': 0.5343022495619539, 'lambda_l1': 0.010042464363004933, 'lambda_l2': 0.020430954765630283, 'min_data_in_leaf': 63, 'max_depth': 9, 'max_bin': 176, 'objective': 'fair'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:57:57,358] Trial 19 finished with value: 65.03769372316803 and parameters: {'learning_rate': 0.0736891350520276, 'colsample_bytree': 0.5707607953542075, 'colsample_bynode': 0.6541288616015215, 'lambda_l1': 1.5668815028546352, 'lambda_l2': 4.443829958052463, 'min_data_in_leaf': 112, 'max_depth': 6, 'max_bin': 451, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 21:59:17,101] Trial 20 finished with value: 65.74102445543663 and parameters: {'learning_rate': 0.05362410086253105, 'colsample_bytree': 0.6675050863376499, 'colsample_bynode': 0.5598920222639564, 'lambda_l1': 3.616590015491389, 'lambda_l2': 6.162433575455978, 'min_data_in_leaf': 40, 'max_depth': 5, 'max_bin': 641, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:00:39,522] Trial 21 finished with value: 64.4688220669771 and parameters: {'learning_rate': 0.08765356877358418, 'colsample_bytree': 0.6836364683507862, 'colsample_bynode': 0.7570951867990051, 'lambda_l1': 1.3380471170198178, 'lambda_l2': 4.8846905544374035, 'min_data_in_leaf': 212, 'max_depth': 6, 'max_bin': 696, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:01:53,951] Trial 22 finished with value: 65.5783024127456 and parameters: {'learning_rate': 0.08789637216907932, 'colsample_bytree': 0.7113842245281656, 'colsample_bynode': 0.6604734521783008, 'lambda_l1': 1.0598152675877688, 'lambda_l2': 4.507752030657134, 'min_data_in_leaf': 214, 'max_depth': 7, 'max_bin': 414, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:03:05,713] Trial 23 finished with value: 65.88917936805008 and parameters: {'learning_rate': 0.07833935222747064, 'colsample_bytree': 0.5851501876876875, 'colsample_bynode': 0.6152245072226018, 'lambda_l1': 1.8203574087178414, 'lambda_l2': 3.3029268773567297, 'min_data_in_leaf': 220, 'max_depth': 6, 'max_bin': 615, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:04:18,350] Trial 24 finished with value: 66.8952446175632 and parameters: {'learning_rate': 0.07191836073151314, 'colsample_bytree': 0.6584527646452828, 'colsample_bynode': 0.7542314706304432, 'lambda_l1': 0.6884067638811844, 'lambda_l2': 2.3423343876893203, 'min_data_in_leaf': 191, 'max_depth': 5, 'max_bin': 356, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:05:24,788] Trial 25 finished with value: 64.54792910114615 and parameters: {'learning_rate': 0.08998257646285214, 'colsample_bytree': 0.7189984465838714, 'colsample_bynode': 0.6773130850870048, 'lambda_l1': 2.2033565036443026, 'lambda_l2': 6.4869822192040285, 'min_data_in_leaf': 229, 'max_depth': 8, 'max_bin': 205, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:06:54,471] Trial 26 finished with value: 347.78177757433684 and parameters: {'learning_rate': 0.09114275743070924, 'colsample_bytree': 0.6577151809492199, 'colsample_bynode': 0.6766312188746019, 'lambda_l1': 3.345478130519953, 'lambda_l2': 6.637088288594086, 'min_data_in_leaf': 226, 'max_depth': 8, 'max_bin': 191, 'objective': 'huber'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:08:16,047] Trial 27 finished with value: 323.38962685783844 and parameters: {'learning_rate': 0.0928048735930003, 'colsample_bytree': 0.576517284668607, 'colsample_bynode': 0.7482932435005811, 'lambda_l1': 2.5586014780014854, 'lambda_l2': 4.821559242033275, 'min_data_in_leaf': 252, 'max_depth': 10, 'max_bin': 122, 'objective': 'fair'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:10:04,750] Trial 28 finished with value: 69.51722302057333 and parameters: {'learning_rate': 0.08084521379108114, 'colsample_bytree': 0.7948868504539878, 'colsample_bynode': 0.7277721444175821, 'lambda_l1': 1.6785149207707377, 'lambda_l2': 3.8823455444317014, 'min_data_in_leaf': 229, 'max_depth': 9, 'max_bin': 249, 'objective': 'regression_l1'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:11:41,646] Trial 29 finished with value: 74.75156144941465 and parameters: {'learning_rate': 0.0836265703394914, 'colsample_bytree': 0.7287158643496555, 'colsample_bynode': 0.6797847580865383, 'lambda_l1': 4.179474847131868, 'lambda_l2': 6.605820475522572, 'min_data_in_leaf': 195, 'max_depth': 8, 'max_bin': 83, 'objective': 'regression_l1'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:13:04,010] Trial 30 finished with value: 65.15901504654353 and parameters: {'learning_rate': 0.09459647190610325, 'colsample_bytree': 0.8056571153351815, 'colsample_bynode': 0.7246831129859409, 'lambda_l1': 2.110627629560476, 'lambda_l2': 1.2276390936421326, 'min_data_in_leaf': 255, 'max_depth': 9, 'max_bin': 689, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:14:19,476] Trial 31 finished with value: 64.30071065977593 and parameters: {'learning_rate': 0.08573153068301223, 'colsample_bytree': 0.6889196237425841, 'colsample_bynode': 0.640679015530436, 'lambda_l1': 1.1281779007889294, 'lambda_l2': 5.027895935040209, 'min_data_in_leaf': 103, 'max_depth': 7, 'max_bin': 492, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:15:36,293] Trial 32 finished with value: 63.99307078151874 and parameters: {'learning_rate': 0.08448852170660137, 'colsample_bytree': 0.6881331551904751, 'colsample_bynode': 0.6527719119272225, 'lambda_l1': 3.144099858733459, 'lambda_l2': 5.090824607748708, 'min_data_in_leaf': 117, 'max_depth': 7, 'max_bin': 513, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:16:54,298] Trial 33 finished with value: 64.71263133978357 and parameters: {'learning_rate': 0.0834025523423895, 'colsample_bytree': 0.6754386959070204, 'colsample_bynode': 0.6423075144322121, 'lambda_l1': 3.190013731808804, 'lambda_l2': 4.883125739985398, 'min_data_in_leaf': 113, 'max_depth': 7, 'max_bin': 521, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:18:14,509] Trial 34 finished with value: 64.95813337108181 and parameters: {'learning_rate': 0.07807500703134605, 'colsample_bytree': 0.630489672332168, 'colsample_bynode': 0.6966706361443425, 'lambda_l1': 2.9325954258903235, 'lambda_l2': 3.620580619406101, 'min_data_in_leaf': 123, 'max_depth': 7, 'max_bin': 663, 'objective': 'regression'}. Best is trial 4 with value: 63.62048636081667.




[I 2023-12-19 22:19:30,532] Trial 35 finished with value: 63.504955337498856 and parameters: {'learning_rate': 0.09510698684492003, 'colsample_bytree': 0.7478177759741577, 'colsample_bynode': 0.6051862690932498, 'lambda_l1': 1.0480954906713513, 'lambda_l2': 4.060025634323563, 'min_data_in_leaf': 87, 'max_depth': 7, 'max_bin': 513, 'objective': 'regression'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:20:47,571] Trial 36 finished with value: 64.59269464994475 and parameters: {'learning_rate': 0.09554762500975367, 'colsample_bytree': 0.7517882777695277, 'colsample_bynode': 0.5826978527569869, 'lambda_l1': 0.9287271776267381, 'lambda_l2': 4.23145849264013, 'min_data_in_leaf': 81, 'max_depth': 7, 'max_bin': 505, 'objective': 'regression'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:22:45,499] Trial 37 finished with value: 344.3375344865692 and parameters: {'learning_rate': 0.09597814622966637, 'colsample_bytree': 0.738767296953317, 'colsample_bynode': 0.5889507728592089, 'lambda_l1': 3.7189149418431384, 'lambda_l2': 2.7569045341662015, 'min_data_in_leaf': 99, 'max_depth': 7, 'max_bin': 572, 'objective': 'huber'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:24:47,965] Trial 38 finished with value: 74.49756948238199 and parameters: {'learning_rate': 0.06665536974091525, 'colsample_bytree': 0.6315440473864886, 'colsample_bynode': 0.6097644829890123, 'lambda_l1': 0.7675141492269987, 'lambda_l2': 3.3406518131425704, 'min_data_in_leaf': 134, 'max_depth': 7, 'max_bin': 477, 'objective': 'regression_l1'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:25:56,913] Trial 39 finished with value: 64.35693358996541 and parameters: {'learning_rate': 0.07702006652050068, 'colsample_bytree': 0.6042799022004889, 'colsample_bynode': 0.6457060643108761, 'lambda_l1': 2.0188230846565225, 'lambda_l2': 4.272167360016148, 'min_data_in_leaf': 66, 'max_depth': 8, 'max_bin': 359, 'objective': 'regression'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:27:20,333] Trial 40 finished with value: 64.018961474287 and parameters: {'learning_rate': 0.08457405198641046, 'colsample_bytree': 0.6823079753341729, 'colsample_bynode': 0.5576434311145527, 'lambda_l1': 2.900573943905714, 'lambda_l2': 2.3006369823430015, 'min_data_in_leaf': 96, 'max_depth': 10, 'max_bin': 795, 'objective': 'regression'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:28:39,951] Trial 41 finished with value: 63.870181855426324 and parameters: {'learning_rate': 0.08465758377838609, 'colsample_bytree': 0.6941897940442853, 'colsample_bynode': 0.5569529126759959, 'lambda_l1': 2.693593005418953, 'lambda_l2': 2.16191102882181, 'min_data_in_leaf': 89, 'max_depth': 7, 'max_bin': 601, 'objective': 'regression'}. Best is trial 35 with value: 63.504955337498856.




[I 2023-12-19 22:30:03,746] Trial 42 finished with value: 63.13380131546242 and parameters: {'learning_rate': 0.08407572243148756, 'colsample_bytree': 0.7391133421728154, 'colsample_bynode': 0.5579200138377782, 'lambda_l1': 2.8319526646000215, 'lambda_l2': 2.1251707803222413, 'min_data_in_leaf': 90, 'max_depth': 10, 'max_bin': 800, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:31:31,990] Trial 43 finished with value: 63.47056266959869 and parameters: {'learning_rate': 0.09207114465717421, 'colsample_bytree': 0.7391548944312686, 'colsample_bynode': 0.5016300416244076, 'lambda_l1': 2.46014979665485, 'lambda_l2': 2.909898700690464, 'min_data_in_leaf': 87, 'max_depth': 10, 'max_bin': 1017, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:32:59,649] Trial 44 finished with value: 63.69656246935248 and parameters: {'learning_rate': 0.09903543949989467, 'colsample_bytree': 0.7644304170417635, 'colsample_bynode': 0.5185430927211465, 'lambda_l1': 2.486889154474128, 'lambda_l2': 1.817127798476811, 'min_data_in_leaf': 85, 'max_depth': 10, 'max_bin': 1021, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:34:27,725] Trial 45 finished with value: 63.51811231871753 and parameters: {'learning_rate': 0.0997403994512108, 'colsample_bytree': 0.7673815437789693, 'colsample_bynode': 0.5320506611130111, 'lambda_l1': 2.436940519914127, 'lambda_l2': 1.6156729340218852, 'min_data_in_leaf': 51, 'max_depth': 10, 'max_bin': 1024, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:36:15,614] Trial 46 finished with value: 1143.7394874610998 and parameters: {'learning_rate': 0.09260393970606615, 'colsample_bytree': 0.7451320312989672, 'colsample_bynode': 0.5012174494256767, 'lambda_l1': 4.33399002862003, 'lambda_l2': 1.168149638448035, 'min_data_in_leaf': 56, 'max_depth': 10, 'max_bin': 925, 'objective': 'fair'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:37:41,029] Trial 47 finished with value: 63.59660667785202 and parameters: {'learning_rate': 0.09998864592233234, 'colsample_bytree': 0.830212856219038, 'colsample_bynode': 0.5365109704663698, 'lambda_l1': 1.798621810871552, 'lambda_l2': 2.882208223007628, 'min_data_in_leaf': 51, 'max_depth': 10, 'max_bin': 855, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:39:55,105] Trial 48 finished with value: 341.53974689616297 and parameters: {'learning_rate': 0.09989155209210103, 'colsample_bytree': 0.8072297080488117, 'colsample_bynode': 0.5274837278150842, 'lambda_l1': 1.7416077854599128, 'lambda_l2': 2.8629948659785764, 'min_data_in_leaf': 48, 'max_depth': 10, 'max_bin': 844, 'objective': 'huber'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:41:28,660] Trial 49 finished with value: 64.22358404277439 and parameters: {'learning_rate': 0.09610872950051673, 'colsample_bytree': 0.8265425243875767, 'colsample_bynode': 0.5376051636789537, 'lambda_l1': 2.395611139986629, 'lambda_l2': 2.8196886079778034, 'min_data_in_leaf': 17, 'max_depth': 10, 'max_bin': 966, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:43:50,536] Trial 50 finished with value: 72.11794942832638 and parameters: {'learning_rate': 0.09070223620412843, 'colsample_bytree': 0.7712430295508459, 'colsample_bynode': 0.5089013862953049, 'lambda_l1': 0.48210788761314616, 'lambda_l2': 1.836614059543592, 'min_data_in_leaf': 66, 'max_depth': 9, 'max_bin': 871, 'objective': 'regression_l1'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:45:21,978] Trial 51 finished with value: 64.36988909085599 and parameters: {'learning_rate': 0.09385701277254216, 'colsample_bytree': 0.7385580676486105, 'colsample_bynode': 0.5438760043164261, 'lambda_l1': 1.3611594419399273, 'lambda_l2': 3.1819486079387254, 'min_data_in_leaf': 77, 'max_depth': 10, 'max_bin': 937, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:47:00,194] Trial 52 finished with value: 64.06118270961285 and parameters: {'learning_rate': 0.09745253030114329, 'colsample_bytree': 0.8358770811265985, 'colsample_bynode': 0.568867872442882, 'lambda_l1': 2.0810632058937957, 'lambda_l2': 0.8061678786830413, 'min_data_in_leaf': 30, 'max_depth': 10, 'max_bin': 1008, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:48:32,377] Trial 53 finished with value: 64.30899197556597 and parameters: {'learning_rate': 0.09099293787201079, 'colsample_bytree': 0.778994885451101, 'colsample_bynode': 0.5245091866848565, 'lambda_l1': 2.369174785495941, 'lambda_l2': 2.497686906914109, 'min_data_in_leaf': 53, 'max_depth': 9, 'max_bin': 817, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:50:09,989] Trial 54 finished with value: 63.40221404656488 and parameters: {'learning_rate': 0.0965019636789947, 'colsample_bytree': 0.7570690314480844, 'colsample_bynode': 0.599237179639833, 'lambda_l1': 1.744475759928698, 'lambda_l2': 1.7479147733852187, 'min_data_in_leaf': 143, 'max_depth': 10, 'max_bin': 888, 'objective': 'regression'}. Best is trial 42 with value: 63.13380131546242.




[I 2023-12-19 22:51:41,843] Trial 55 finished with value: 62.72462398036485 and parameters: {'learning_rate': 0.09625425976298063, 'colsample_bytree': 0.7588246004819929, 'colsample_bynode': 0.5429950754414783, 'lambda_l1': 1.5285663331501915, 'lambda_l2': 1.7284311160327857, 'min_data_in_leaf': 141, 'max_depth': 10, 'max_bin': 884, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 22:53:14,090] Trial 56 finished with value: 64.44924748484848 and parameters: {'learning_rate': 0.09544710631659131, 'colsample_bytree': 0.7589894177589954, 'colsample_bynode': 0.571718423848724, 'lambda_l1': 1.269104401057605, 'lambda_l2': 1.6941728497793107, 'min_data_in_leaf': 155, 'max_depth': 10, 'max_bin': 896, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 22:54:48,013] Trial 57 finished with value: 63.04567848317379 and parameters: {'learning_rate': 0.08842466494635, 'colsample_bytree': 0.7325523936447261, 'colsample_bynode': 0.5935451802367007, 'lambda_l1': 1.6852509232926676, 'lambda_l2': 1.5274956579895427, 'min_data_in_leaf': 142, 'max_depth': 10, 'max_bin': 975, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 22:56:36,143] Trial 58 finished with value: 653.5256609363119 and parameters: {'learning_rate': 0.08865572962977146, 'colsample_bytree': 0.730939645780211, 'colsample_bynode': 0.5979457087470924, 'lambda_l1': 0.4311929411119708, 'lambda_l2': 2.1159563808322326, 'min_data_in_leaf': 142, 'max_depth': 9, 'max_bin': 975, 'objective': 'fair'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 22:58:05,888] Trial 59 finished with value: 63.590466493281255 and parameters: {'learning_rate': 0.08854713981739676, 'colsample_bytree': 0.7213750040015586, 'colsample_bynode': 0.6047219492123272, 'lambda_l1': 1.6503202367085297, 'lambda_l2': 0.8768734522642676, 'min_data_in_leaf': 132, 'max_depth': 10, 'max_bin': 782, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 22:59:40,931] Trial 60 finished with value: 63.13867496743438 and parameters: {'learning_rate': 0.07975934526268831, 'colsample_bytree': 0.7059782846108552, 'colsample_bynode': 0.5508299350911939, 'lambda_l1': 0.9751341450780692, 'lambda_l2': 2.545319192066564, 'min_data_in_leaf': 167, 'max_depth': 9, 'max_bin': 952, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 23:01:15,215] Trial 61 finished with value: 63.28909578851369 and parameters: {'learning_rate': 0.08111130397298766, 'colsample_bytree': 0.7081568686332064, 'colsample_bynode': 0.5854511494809272, 'lambda_l1': 1.0424092038622366, 'lambda_l2': 2.3783732657193344, 'min_data_in_leaf': 177, 'max_depth': 9, 'max_bin': 936, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 23:02:49,503] Trial 62 finished with value: 63.077344335978545 and parameters: {'learning_rate': 0.0808458500421793, 'colsample_bytree': 0.7135900058552546, 'colsample_bynode': 0.5513009892023827, 'lambda_l1': 1.4266609229695022, 'lambda_l2': 2.429822539589743, 'min_data_in_leaf': 175, 'max_depth': 9, 'max_bin': 954, 'objective': 'regression'}. Best is trial 55 with value: 62.72462398036485.




[I 2023-12-19 23:04:23,868] Trial 63 finished with value: 62.61718036113329 and parameters: {'learning_rate': 0.08072727696049717, 'colsample_bytree': 0.7107613501491252, 'colsample_bynode': 0.579127562270319, 'lambda_l1': 0.3688343201590739, 'lambda_l2': 2.5405843112295075, 'min_data_in_leaf': 179, 'max_depth': 9, 'max_bin': 944, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:05:58,047] Trial 64 finished with value: 63.77696909599852 and parameters: {'learning_rate': 0.08056094365509803, 'colsample_bytree': 0.7070601880410032, 'colsample_bynode': 0.5784735723247301, 'lambda_l1': 0.41100472775926544, 'lambda_l2': 2.55666181440713, 'min_data_in_leaf': 177, 'max_depth': 9, 'max_bin': 946, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:07:32,266] Trial 65 finished with value: 62.79148730018921 and parameters: {'learning_rate': 0.0755078020932346, 'colsample_bytree': 0.7139571438850577, 'colsample_bynode': 0.5625233521837679, 'lambda_l1': 0.6817089737760329, 'lambda_l2': 2.1832121326867746, 'min_data_in_leaf': 185, 'max_depth': 9, 'max_bin': 906, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:09:55,173] Trial 66 finished with value: 359.37192866032177 and parameters: {'learning_rate': 0.07493073770168424, 'colsample_bytree': 0.7225991986098718, 'colsample_bynode': 0.5499907149300807, 'lambda_l1': 0.8047316227469166, 'lambda_l2': 1.9500613505763777, 'min_data_in_leaf': 163, 'max_depth': 9, 'max_bin': 981, 'objective': 'huber'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:11:25,130] Trial 67 finished with value: 64.16750190554184 and parameters: {'learning_rate': 0.0789133312536156, 'colsample_bytree': 0.6545813025139188, 'colsample_bynode': 0.5618528561449637, 'lambda_l1': 0.1238299831245796, 'lambda_l2': 1.366248219555803, 'min_data_in_leaf': 190, 'max_depth': 9, 'max_bin': 828, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:12:58,672] Trial 68 finished with value: 63.88746167892413 and parameters: {'learning_rate': 0.07292555543158814, 'colsample_bytree': 0.7062023352556686, 'colsample_bynode': 0.5479903725712767, 'lambda_l1': 1.323954938071743, 'lambda_l2': 1.488524654210397, 'min_data_in_leaf': 201, 'max_depth': 9, 'max_bin': 905, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:14:50,651] Trial 69 finished with value: 675.3365927851456 and parameters: {'learning_rate': 0.08661803480698713, 'colsample_bytree': 0.7289105058295682, 'colsample_bynode': 0.629168992706949, 'lambda_l1': 0.7027077629963537, 'lambda_l2': 2.133454762556668, 'min_data_in_leaf': 185, 'max_depth': 8, 'max_bin': 743, 'objective': 'fair'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:17:21,742] Trial 70 finished with value: 72.82249581477213 and parameters: {'learning_rate': 0.07559449857964286, 'colsample_bytree': 0.695341516483266, 'colsample_bynode': 0.5188242597960324, 'lambda_l1': 0.36035949459531436, 'lambda_l2': 2.5503361072236803, 'min_data_in_leaf': 167, 'max_depth': 9, 'max_bin': 955, 'objective': 'regression_l1'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:18:54,825] Trial 71 finished with value: 63.23312801046306 and parameters: {'learning_rate': 0.07984356977256639, 'colsample_bytree': 0.701855906160987, 'colsample_bynode': 0.5780081973459794, 'lambda_l1': 1.0159293246264538, 'lambda_l2': 2.3707423452342815, 'min_data_in_leaf': 174, 'max_depth': 9, 'max_bin': 930, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:20:28,685] Trial 72 finished with value: 63.93317239076335 and parameters: {'learning_rate': 0.08144084437282055, 'colsample_bytree': 0.7183962906075129, 'colsample_bynode': 0.5648590130226571, 'lambda_l1': 0.9819997013094559, 'lambda_l2': 2.045839533454735, 'min_data_in_leaf': 154, 'max_depth': 9, 'max_bin': 879, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:22:00,941] Trial 73 finished with value: 64.59423183918982 and parameters: {'learning_rate': 0.07803002882885224, 'colsample_bytree': 0.6793429509386956, 'colsample_bynode': 0.5751282185441736, 'lambda_l1': 1.451350010343533, 'lambda_l2': 2.5203766918075874, 'min_data_in_leaf': 175, 'max_depth': 9, 'max_bin': 914, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:23:37,413] Trial 74 finished with value: 62.775753124586856 and parameters: {'learning_rate': 0.07032018277568776, 'colsample_bytree': 0.6675698298775996, 'colsample_bynode': 0.585514763596338, 'lambda_l1': 0.574056179207332, 'lambda_l2': 1.4459733783354998, 'min_data_in_leaf': 157, 'max_depth': 8, 'max_bin': 989, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:25:13,370] Trial 75 finished with value: 63.59773939719624 and parameters: {'learning_rate': 0.06967008248151312, 'colsample_bytree': 0.6644163057356616, 'colsample_bynode': 0.5510658100394441, 'lambda_l1': 0.6113714950662702, 'lambda_l2': 1.1091978280873893, 'min_data_in_leaf': 204, 'max_depth': 8, 'max_bin': 992, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:26:49,089] Trial 76 finished with value: 64.02931333364417 and parameters: {'learning_rate': 0.07634978354843572, 'colsample_bytree': 0.6437014248607419, 'colsample_bynode': 0.5880017025597388, 'lambda_l1': 0.250807382184689, 'lambda_l2': 0.545344317522583, 'min_data_in_leaf': 163, 'max_depth': 9, 'max_bin': 984, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:28:18,732] Trial 77 finished with value: 64.59531513046481 and parameters: {'learning_rate': 0.08206373044679245, 'colsample_bytree': 0.6773889987289902, 'colsample_bynode': 0.6272853946328861, 'lambda_l1': 0.018893083430796542, 'lambda_l2': 1.4552762529872711, 'min_data_in_leaf': 149, 'max_depth': 8, 'max_bin': 796, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:29:51,168] Trial 78 finished with value: 64.26635811024927 and parameters: {'learning_rate': 0.08676017837863294, 'colsample_bytree': 0.7491665902678234, 'colsample_bynode': 0.5456795725873986, 'lambda_l1': 0.6606050228828279, 'lambda_l2': 1.4757429592693219, 'min_data_in_leaf': 184, 'max_depth': 8, 'max_bin': 858, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:31:24,256] Trial 79 finished with value: 63.009473485574055 and parameters: {'learning_rate': 0.08350871276386847, 'colsample_bytree': 0.6916801218906917, 'colsample_bynode': 0.6158212599995496, 'lambda_l1': 1.4918104880667218, 'lambda_l2': 1.9200879984507895, 'min_data_in_leaf': 126, 'max_depth': 10, 'max_bin': 953, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:33:44,328] Trial 80 finished with value: 352.66824038066807 and parameters: {'learning_rate': 0.0842446376903542, 'colsample_bytree': 0.6899387320436521, 'colsample_bynode': 0.6126616225983019, 'lambda_l1': 1.9561523265808338, 'lambda_l2': 1.004458222754603, 'min_data_in_leaf': 137, 'max_depth': 10, 'max_bin': 898, 'objective': 'huber'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:35:19,514] Trial 81 finished with value: 63.341691673688125 and parameters: {'learning_rate': 0.07274138446815713, 'colsample_bytree': 0.7204228400060463, 'colsample_bynode': 0.5958933172937354, 'lambda_l1': 1.429305185344559, 'lambda_l2': 1.803176244558351, 'min_data_in_leaf': 122, 'max_depth': 10, 'max_bin': 961, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:36:54,454] Trial 82 finished with value: 63.2730419279811 and parameters: {'learning_rate': 0.0827237170880476, 'colsample_bytree': 0.7331948350447401, 'colsample_bynode': 0.5626782879656779, 'lambda_l1': 1.2467212638412721, 'lambda_l2': 2.0377067297161355, 'min_data_in_leaf': 157, 'max_depth': 10, 'max_bin': 994, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:38:26,425] Trial 83 finished with value: 63.41001706807907 and parameters: {'learning_rate': 0.08600096369033419, 'colsample_bytree': 0.7012586799381573, 'colsample_bynode': 0.5360811502116121, 'lambda_l1': 0.8594445130147346, 'lambda_l2': 1.30017808853996, 'min_data_in_leaf': 128, 'max_depth': 10, 'max_bin': 918, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:39:57,405] Trial 84 finished with value: 63.36949342341671 and parameters: {'learning_rate': 0.07831065131530718, 'colsample_bytree': 0.6491886537294157, 'colsample_bynode': 0.5684602405662275, 'lambda_l1': 1.4857880945229063, 'lambda_l2': 3.0921664737429992, 'min_data_in_leaf': 169, 'max_depth': 9, 'max_bin': 825, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:41:31,694] Trial 85 finished with value: 64.34773291527004 and parameters: {'learning_rate': 0.08957835691786405, 'colsample_bytree': 0.6694956410097197, 'colsample_bynode': 0.5839140724446183, 'lambda_l1': 0.26344949537270784, 'lambda_l2': 0.7367835324008578, 'min_data_in_leaf': 147, 'max_depth': 10, 'max_bin': 959, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:43:03,957] Trial 86 finished with value: 63.32349040070592 and parameters: {'learning_rate': 0.07470110608331543, 'colsample_bytree': 0.713652720398745, 'colsample_bynode': 0.6122952071229429, 'lambda_l1': 0.5424222642211679, 'lambda_l2': 1.651311444204795, 'min_data_in_leaf': 108, 'max_depth': 10, 'max_bin': 878, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:44:37,881] Trial 87 finished with value: 63.627344423867854 and parameters: {'learning_rate': 0.0797400707812084, 'colsample_bytree': 0.6895386522380111, 'colsample_bynode': 0.5168367818373887, 'lambda_l1': 1.1855203891398447, 'lambda_l2': 2.2564217433029823, 'min_data_in_leaf': 139, 'max_depth': 9, 'max_bin': 998, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:47:02,566] Trial 88 finished with value: 73.62057670768517 and parameters: {'learning_rate': 0.0829447220224645, 'colsample_bytree': 0.7363917188674689, 'colsample_bynode': 0.5565180915867457, 'lambda_l1': 1.902721832441445, 'lambda_l2': 3.4600564622125787, 'min_data_in_leaf': 187, 'max_depth': 10, 'max_bin': 946, 'objective': 'regression_l1'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:48:36,222] Trial 89 finished with value: 64.36022994365831 and parameters: {'learning_rate': 0.07667601354150169, 'colsample_bytree': 0.7482697110076176, 'colsample_bynode': 0.5943607745447926, 'lambda_l1': 1.5398927742806583, 'lambda_l2': 1.278020195431496, 'min_data_in_leaf': 196, 'max_depth': 8, 'max_bin': 910, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:50:09,430] Trial 90 finished with value: 62.699737656351395 and parameters: {'learning_rate': 0.07080204187983008, 'colsample_bytree': 0.6749801170798487, 'colsample_bynode': 0.5400722250452618, 'lambda_l1': 2.1575601096962553, 'lambda_l2': 2.693018864511733, 'min_data_in_leaf': 160, 'max_depth': 9, 'max_bin': 847, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:51:41,180] Trial 91 finished with value: 63.78776903583616 and parameters: {'learning_rate': 0.06782169875629462, 'colsample_bytree': 0.6678883875519281, 'colsample_bynode': 0.530976484552227, 'lambda_l1': 2.151442221526283, 'lambda_l2': 2.6812341116597347, 'min_data_in_leaf': 160, 'max_depth': 9, 'max_bin': 839, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:53:18,219] Trial 92 finished with value: 63.53534911385256 and parameters: {'learning_rate': 0.07120997785697666, 'colsample_bytree': 0.7003670157351496, 'colsample_bynode': 0.5447729260754399, 'lambda_l1': 1.6329979232470782, 'lambda_l2': 3.030718834153986, 'min_data_in_leaf': 181, 'max_depth': 9, 'max_bin': 874, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:54:54,599] Trial 93 finished with value: 63.61480595428734 and parameters: {'learning_rate': 0.07309425161569867, 'colsample_bytree': 0.6800909184394296, 'colsample_bynode': 0.572022696258895, 'lambda_l1': 0.827955510671578, 'lambda_l2': 2.286088871111954, 'min_data_in_leaf': 167, 'max_depth': 10, 'max_bin': 969, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:56:29,528] Trial 94 finished with value: 64.4935888787379 and parameters: {'learning_rate': 0.063051832796891, 'colsample_bytree': 0.7125820308270756, 'colsample_bynode': 0.5122683668687619, 'lambda_l1': 1.151012038567258, 'lambda_l2': 1.9610907996112392, 'min_data_in_leaf': 152, 'max_depth': 9, 'max_bin': 928, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:58:20,399] Trial 95 finished with value: 596.1057098346478 and parameters: {'learning_rate': 0.08782719363637574, 'colsample_bytree': 0.6565872465332724, 'colsample_bynode': 0.5300068415644668, 'lambda_l1': 1.923516931808626, 'lambda_l2': 2.7019857155381786, 'min_data_in_leaf': 125, 'max_depth': 8, 'max_bin': 855, 'objective': 'fair'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-19 23:59:49,939] Trial 96 finished with value: 63.134740128111936 and parameters: {'learning_rate': 0.0856007795850326, 'colsample_bytree': 0.6366580421596469, 'colsample_bynode': 0.5531941002018655, 'lambda_l1': 0.568641712153015, 'lambda_l2': 1.8191394650901094, 'min_data_in_leaf': 146, 'max_depth': 10, 'max_bin': 807, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-20 00:01:17,474] Trial 97 finished with value: 64.21538777898934 and parameters: {'learning_rate': 0.0853685013580283, 'colsample_bytree': 0.6391508696343667, 'colsample_bynode': 0.5905254809696934, 'lambda_l1': 0.5160670029553193, 'lambda_l2': 1.591842310457785, 'min_data_in_leaf': 120, 'max_depth': 10, 'max_bin': 722, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-20 00:02:45,979] Trial 98 finished with value: 63.86778712988791 and parameters: {'learning_rate': 0.09295939953936543, 'colsample_bytree': 0.6230521144043046, 'colsample_bynode': 0.564561753717018, 'lambda_l1': 2.2373800359654745, 'lambda_l2': 1.8660172017465244, 'min_data_in_leaf': 157, 'max_depth': 10, 'max_bin': 775, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.




[I 2023-12-20 00:04:16,206] Trial 99 finished with value: 64.42605119245918 and parameters: {'learning_rate': 0.08238920165905475, 'colsample_bytree': 0.6477828230638696, 'colsample_bynode': 0.6024477779192345, 'lambda_l1': 2.7463984078024937, 'lambda_l2': 1.1327059602814233, 'min_data_in_leaf': 144, 'max_depth': 10, 'max_bin': 792, 'objective': 'regression'}. Best is trial 63 with value: 62.61718036113329.
Best Params: {'learning_rate': 0.08072727696049717, 'colsample_bytree': 0.7107613501491252, 'colsample_bynode': 0.579127562270319, 'lambda_l1': 0.3688343201590739, 'lambda_l2': 2.5405843112295075, 'min_data_in_leaf': 179, 'max_depth': 9, 'max_bin': 944, 'objective': 'regression'}


In [13]:
best_params = {
    'n_iter'           : 900,
    'verbose'          : -1,
    'objective'        : 'regression',
    'learning_rate'    : 0.08072727696049717,
    'colsample_bytree' : 0.7107613501491252,
    'colsample_bynode' : 0.579127562270319,
    'lambda_l1'        : 0.3688343201590739,
    'lambda_l2'        : 2.5405843112295075,
    'min_data_in_leaf' : 179,
    'max_depth'        : 9,
    'max_bin'          : 944,
} # val score is 62.24 for the last month

### Validation

In [14]:
result = cross_validate(
    estimator=lgb.LGBMRegressor(**best_params, random_state=42),
    X=df_train.drop(columns=["target"]), 
    y=df_train["target"],
    scoring="neg_mean_absolute_error",
    cv=MonthlyKFold(1),
)

print(f"Fit Time(s): {result['fit_time'].mean():.3f}")
print(f"Score Time(s): {result['score_time'].mean():.3f}")
print(f"Error(MAE): {-result['test_score'].mean():.3f}")



KeyboardInterrupt: 

### Training

In [14]:
if load_path is not None:
    model = pickle.load(open(load_path, "rb"))
else:
    model = VotingRegressor([
        ('lgb_1', lgb.LGBMRegressor(**best_params, random_state=100)), 
        ('lgb_2', lgb.LGBMRegressor(**best_params, random_state=101)), 
        ('lgb_3', lgb.LGBMRegressor(**best_params, random_state=102)), 
        ('lgb_4', lgb.LGBMRegressor(**best_params, random_state=103)), 
        ('lgb_5', lgb.LGBMRegressor(**best_params, random_state=104)), 
    ])
    
    model.fit(
        X=df_train.drop(columns=["target"]),
        y=df_train["target"]
    )

if save_path is not None:
    with open(save_path, "wb") as f:
        pickle.dump(model, f)



### Prediction

In [21]:
import public_timeseries_testing_util as enefit

env = enefit.make_env()
iter_test = env.iter_test()

In [22]:
for (test, revealed_targets, client, historical_weather,
        forecast_weather, electricity_prices, gas_prices, sample_prediction) in iter_test:
    
    test = test.rename(columns={"prediction_datetime": "datetime"})
    test['datetime'] = pd.to_datetime(test['datetime'])
    electricity_prices['forecast_date'] = pd.to_datetime(electricity_prices['forecast_date'])
    electricity_prices['origin_date'] = pd.to_datetime(electricity_prices['origin_date'])
    forecast_weather['origin_datetime'] = pd.to_datetime(forecast_weather['origin_datetime'])
    forecast_weather['forecast_datetime'] = pd.to_datetime(forecast_weather['forecast_datetime'])
    historical_weather['datetime'] = pd.to_datetime(historical_weather['datetime'])
    revealed_targets['datetime'] = pd.to_datetime(revealed_targets['datetime'])
    
    df_test           = pl.from_pandas(test[data_cols[1:]], schema_overrides=schema_data)
    df_client         = pl.from_pandas(client[client_cols], schema_overrides=schema_client)
    df_gas            = pl.from_pandas(gas_prices[gas_cols], schema_overrides=schema_gas)
    df_electricity    = pl.from_pandas(electricity_prices[electricity_cols], schema_overrides=schema_electricity)
    df_new_forecast   = pl.from_pandas(forecast_weather[forecast_cols], schema_overrides=schema_forecast)
    df_new_historical = pl.from_pandas(historical_weather[historical_cols], schema_overrides=schema_historical)
    df_new_target     = pl.from_pandas(revealed_targets[target_cols], schema_overrides=schema_target)
    
    df_forecast       = pl.concat([df_forecast, df_new_forecast]).unique()
    df_historical     = pl.concat([df_historical, df_new_historical]).unique()
    df_target         = pl.concat([df_target, df_new_target]).unique()
    
    X_test = feature_eng(df_test, df_client, df_gas, df_electricity, df_forecast, df_historical, df_location, df_target)
    X_test = to_pandas(X_test)
    
    sample_prediction["target"] = model.predict(X_test).clip(0)
    
    env.predict(sample_prediction)