In [29]:
import pandas as pd
import numpy as np

In [45]:
import optuna.integration.lightgbm as gbm
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import optuna as opt


In [166]:
class Data:
    def __init__(self, path: str) -> None:    
        demand_path = path + "/Demand_history.csv"
        existingEV_path = path + "/existing_EV_infrastructure_2018.csv"
        self.df_orig = pd.read_csv(demand_path)
        self.df_trans = self.df_orig
        self.years_window = 2
        self.y_cols = [f"n-{y}" for y in range(1, self.years_window + 1)]
        
    def clean(self):
        self.df_orig.loc[(self.df_orig != 0).any(1)]
        print(self.df_orig)
        
    def split(self):
        
        x_train = pd.DataFrame(columns=["x_coordinate", "y_coordinate", *self.y_cols])
        x_test = pd.DataFrame(columns=["x_coordinate", "y_coordinate", *self.y_cols])
        y_train = pd.Series(dtype=np.float64)
        y_test = pd.Series(dtype=np.float64)
        for y in self.df_trans.columns[self.df_trans.columns.str.startswith('20')]:
            y = int(y)
            if y < 2010 + self.years_window:
                continue
            y_cols = [f"{y - i}" for i in range(1, self.years_window + 1)]
            stack_x = self.df_trans.loc[:, self.df_trans.columns.isin(["x_coordinate", "y_coordinate", *y_cols])]
            y_dict = {f"{y - i}": f"n-{i}" for i in range(1, self.years_window + 1)}
            stack_x = stack_x.rename(columns=y_dict)
            stack_y = self.df_trans.loc[:, f"{y}"]
            if y == 2018:
                 x_test = pd.concat([x_test, stack_x], axis=0, ignore_index=True)
                 y_test = pd.concat([y_test, stack_y], axis=0, ignore_index=True)
            else:
                x_train = pd.concat([x_train, stack_x], axis=0, ignore_index=True)
                y_train = pd.concat([y_train, stack_y], axis=0, ignore_index=True)
        
        data.x_train = x_train
        data.x_test = x_test
        data.y_train = y_train
        data.y_test = y_test
        
    def addYearDemandfromForecast(self, year: int, predicted: np.array) -> None:
        predicted = predicted.reshape(-1, 1)
        self.df_trans[f"{year}"] = predicted        
                  
path = "data"
data = Data(path)
data.split()

In [167]:
class Model:
    def __init__(self, data: Data) -> None:
        self.data = data
    
    def fit(self) -> None:
        params = {
            "n_estimators": 10000,
            "objective": "regression",
            "metric": "rmse",
            "verbosity": -1,
            "boosting_type": "gbdt",
            "categorical_feature": 0
         }
        dtrain = gbm.Dataset(data.x_train, data.y_train)
        print(dtrain)
        deval = gbm.Dataset(data.x_test, data.y_test, reference=dtrain)
        self.model = gbm.train(params, train_set=dtrain, valid_sets=deval,
            early_stopping_rounds=100)
        
        pred = self.model.predict(data.x_train)
        train_rmse = np.sqrt(mean_squared_error(data.y_train, pred))
        train_mae = mean_absolute_error(data.y_train, pred)   
        train_r2 = r2_score(data.y_train, pred) 
        pred = self.model.predict(data.x_test)
        test_rmse = np.sqrt(mean_squared_error(data.y_test, pred))
        test_mae = mean_absolute_error(data.y_test, pred)   
        test_r2 = r2_score(data.y_test, pred)    
        
        print(f"training: rmse={train_rmse}, mae={train_mae}, r2={train_r2}")   
        print(f"test: rmse={test_rmse}, mae={test_mae}, r2={test_r2}")                        
    
    def predict(self, year: int) -> pd.Series:
        y_cols = [f"{year - i}" for i in range(1, data.years_window + 1)]
        x_forecast = \
            data.df_trans.loc[:,
                              data.df_trans.columns.isin(["x_coordinate", "y_coordinate", *y_cols])]        
        return self.model.predict(x_forecast)
    
    def set_params(self, params):
        params['n_estimators'] = int(params['n_estimators'])
        params['max_depth'] = int(params['max_depth'])
        self.model.set_params(**params)
        

In [168]:
model = Model(data)

In [169]:
model.fit()

[32m[I 2022-09-14 16:37:25,884][0m A new study created in memory with name: no-name-dc322646-72f4-4bfd-9812-2afd5769346e[0m


<lightgbm.basic.Dataset object at 0x37a78aa10>


feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


Please use categorical_feature argument of the Dataset constructor to pass this parameter.
feature_fraction, val_score: 16.408345:  14%|#4        | 1/7 [00:02<00:12,  2.06s/it][32m[I 2022-09-14 16:37:27,955][0m Trial 0 finished with value: 16.40834461303095 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 16.40834461303095.[0m
feature_fraction, val_score: 16.408345:  14%|#4        | 1/7 [00:02<00:12,  2.06s/it]

Early stopping, best iteration is:
[19]	valid_0's rmse: 16.4083
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743:  29%|##8       | 2/7 [00:04<00:10,  2.17s/it][32m[I 2022-09-14 16:37:30,204][0m Trial 1 finished with value: 15.010742984074376 and parameters: {'feature_fraction': 0.4}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743:  29%|##8       | 2/7 [00:04<00:10,  2.17s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0107
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743:  43%|####2     | 3/7 [00:06<00:08,  2.13s/it][32m[I 2022-09-14 16:37:32,286][0m Trial 2 finished with value: 16.01780806571396 and parameters: {'feature_fraction': 0.8}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743:  43%|####2     | 3/7 [00:06<00:08,  2.13s/it]

Early stopping, best iteration is:
[19]	valid_0's rmse: 16.0178
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743:  57%|#####7    | 4/7 [00:08<00:06,  2.11s/it][32m[I 2022-09-14 16:37:34,351][0m Trial 3 finished with value: 16.40834461303095 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743:  57%|#####7    | 4/7 [00:08<00:06,  2.11s/it]

Early stopping, best iteration is:
[19]	valid_0's rmse: 16.4083
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743:  71%|#######1  | 5/7 [00:10<00:04,  2.14s/it][32m[I 2022-09-14 16:37:36,545][0m Trial 4 finished with value: 15.010742984074376 and parameters: {'feature_fraction': 0.5}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743:  71%|#######1  | 5/7 [00:10<00:04,  2.14s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0107
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743:  86%|########5 | 6/7 [00:12<00:02,  2.19s/it][32m[I 2022-09-14 16:37:38,827][0m Trial 5 finished with value: 15.010742984074376 and parameters: {'feature_fraction': 0.6}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743:  86%|########5 | 6/7 [00:12<00:02,  2.19s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0107
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 15.010743: 100%|##########| 7/7 [00:14<00:00,  2.13s/it][32m[I 2022-09-14 16:37:40,852][0m Trial 6 finished with value: 16.01780806571396 and parameters: {'feature_fraction': 0.7}. Best is trial 1 with value: 15.010742984074376.[0m
feature_fraction, val_score: 15.010743: 100%|##########| 7/7 [00:14<00:00,  2.14s/it]


Early stopping, best iteration is:
[19]	valid_0's rmse: 16.0178


num_leaves, val_score: 15.010743:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:   5%|5         | 1/20 [00:11<03:37, 11.43s/it][32m[I 2022-09-14 16:37:52,283][0m Trial 7 finished with value: 15.134080186416341 and parameters: {'num_leaves': 193}. Best is trial 7 with value: 15.134080186416341.[0m
num_leaves, val_score: 15.010743:   5%|5         | 1/20 [00:11<03:37, 11.43s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1341
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  10%|#         | 2/20 [00:17<02:30,  8.36s/it][32m[I 2022-09-14 16:37:58,498][0m Trial 8 finished with value: 15.19637562280185 and parameters: {'num_leaves': 98}. Best is trial 7 with value: 15.134080186416341.[0m
num_leaves, val_score: 15.010743:  10%|#         | 2/20 [00:17<02:30,  8.36s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1964
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  15%|#5        | 3/20 [00:23<02:01,  7.12s/it][32m[I 2022-09-14 16:38:04,149][0m Trial 9 finished with value: 15.182196527516005 and parameters: {'num_leaves': 95}. Best is trial 7 with value: 15.134080186416341.[0m
num_leaves, val_score: 15.010743:  15%|#5        | 3/20 [00:23<02:01,  7.12s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1822
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  20%|##        | 4/20 [00:28<01:39,  6.24s/it][32m[I 2022-09-14 16:38:09,034][0m Trial 10 finished with value: 15.073884074870776 and parameters: {'num_leaves': 66}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  20%|##        | 4/20 [00:28<01:39,  6.24s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0739
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  25%|##5       | 5/20 [00:29<01:06,  4.43s/it][32m[I 2022-09-14 16:38:10,246][0m Trial 11 finished with value: 15.315692862943239 and parameters: {'num_leaves': 16}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  25%|##5       | 5/20 [00:29<01:06,  4.43s/it]

Early stopping, best iteration is:
[27]	valid_0's rmse: 15.3157
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  30%|###       | 6/20 [00:30<00:46,  3.33s/it][32m[I 2022-09-14 16:38:11,447][0m Trial 12 finished with value: 15.384496153299633 and parameters: {'num_leaves': 14}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  30%|###       | 6/20 [00:30<00:46,  3.33s/it]

Early stopping, best iteration is:
[28]	valid_0's rmse: 15.3845
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  35%|###5      | 7/20 [00:36<00:52,  4.01s/it][32m[I 2022-09-14 16:38:16,864][0m Trial 13 finished with value: 15.182196527516005 and parameters: {'num_leaves': 95}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  35%|###5      | 7/20 [00:36<00:52,  4.01s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1822
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  40%|####      | 8/20 [00:43<01:01,  5.14s/it][32m[I 2022-09-14 16:38:24,414][0m Trial 14 finished with value: 15.12781908670164 and parameters: {'num_leaves': 123}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  40%|####      | 8/20 [00:43<01:01,  5.14s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1278
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  45%|####5     | 9/20 [00:51<01:05,  5.97s/it][32m[I 2022-09-14 16:38:32,224][0m Trial 15 finished with value: 15.19637562280185 and parameters: {'num_leaves': 98}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  45%|####5     | 9/20 [00:51<01:05,  5.97s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1964
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  50%|#####     | 10/20 [00:57<01:01,  6.14s/it][32m[I 2022-09-14 16:38:38,737][0m Trial 16 finished with value: 15.188880358234762 and parameters: {'num_leaves': 117}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  50%|#####     | 10/20 [00:57<01:01,  6.14s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1889
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  55%|#####5    | 11/20 [01:10<01:13,  8.20s/it][32m[I 2022-09-14 16:38:51,597][0m Trial 17 finished with value: 15.135934766345429 and parameters: {'num_leaves': 241}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  55%|#####5    | 11/20 [01:10<01:13,  8.20s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1359
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  60%|######    | 12/20 [01:19<01:07,  8.39s/it][32m[I 2022-09-14 16:39:00,430][0m Trial 18 finished with value: 15.158186544285483 and parameters: {'num_leaves': 165}. Best is trial 10 with value: 15.073884074870776.[0m
num_leaves, val_score: 15.010743:  60%|######    | 12/20 [01:19<01:07,  8.39s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1582
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  65%|######5   | 13/20 [01:22<00:47,  6.86s/it][32m[I 2022-09-14 16:39:03,757][0m Trial 19 finished with value: 15.060095743587508 and parameters: {'num_leaves': 53}. Best is trial 19 with value: 15.060095743587508.[0m
num_leaves, val_score: 15.010743:  65%|######5   | 13/20 [01:22<00:47,  6.86s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0601
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  70%|#######   | 14/20 [01:26<00:35,  5.93s/it][32m[I 2022-09-14 16:39:07,542][0m Trial 20 finished with value: 15.090323018486595 and parameters: {'num_leaves': 57}. Best is trial 19 with value: 15.060095743587508.[0m
num_leaves, val_score: 15.010743:  70%|#######   | 14/20 [01:26<00:35,  5.93s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0903
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  75%|#######5  | 15/20 [01:30<00:26,  5.36s/it][32m[I 2022-09-14 16:39:11,577][0m Trial 21 finished with value: 15.106691536436657 and parameters: {'num_leaves': 51}. Best is trial 19 with value: 15.060095743587508.[0m
num_leaves, val_score: 15.010743:  75%|#######5  | 15/20 [01:30<00:26,  5.36s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1067
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  80%|########  | 16/20 [01:34<00:20,  5.02s/it][32m[I 2022-09-14 16:39:15,806][0m Trial 22 finished with value: 15.11809517298249 and parameters: {'num_leaves': 49}. Best is trial 19 with value: 15.060095743587508.[0m
num_leaves, val_score: 15.010743:  80%|########  | 16/20 [01:34<00:20,  5.02s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1181
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  85%|########5 | 17/20 [01:39<00:14,  4.78s/it][32m[I 2022-09-14 16:39:20,028][0m Trial 23 finished with value: 15.049699809031749 and parameters: {'num_leaves': 60}. Best is trial 23 with value: 15.049699809031749.[0m
num_leaves, val_score: 15.010743:  85%|########5 | 17/20 [01:39<00:14,  4.78s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0497
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  90%|######### | 18/20 [01:48<00:12,  6.10s/it][32m[I 2022-09-14 16:39:29,203][0m Trial 24 finished with value: 15.17546928653827 and parameters: {'num_leaves': 150}. Best is trial 23 with value: 15.049699809031749.[0m
num_leaves, val_score: 15.010743:  90%|######### | 18/20 [01:48<00:12,  6.10s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.1755
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743:  95%|#########5| 19/20 [01:50<00:04,  5.00s/it][32m[I 2022-09-14 16:39:31,633][0m Trial 25 finished with value: 15.010742984074376 and parameters: {'num_leaves': 31}. Best is trial 25 with value: 15.010742984074376.[0m
num_leaves, val_score: 15.010743:  95%|#########5| 19/20 [01:50<00:04,  5.00s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0107
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 15.010743: 100%|##########| 20/20 [01:51<00:00,  3.72s/it][32m[I 2022-09-14 16:39:32,366][0m Trial 26 finished with value: 15.871714024839095 and parameters: {'num_leaves': 6}. Best is trial 25 with value: 15.010742984074376.[0m
num_leaves, val_score: 15.010743: 100%|##########| 20/20 [01:51<00:00,  5.58s/it]


Early stopping, best iteration is:
[29]	valid_0's rmse: 15.8717


bagging, val_score: 15.010743:   0%|          | 0/10 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  10%|#         | 1/10 [00:02<00:21,  2.42s/it][32m[I 2022-09-14 16:39:34,785][0m Trial 27 finished with value: 14.827228516609862 and parameters: {'bagging_fraction': 0.5269801809894871, 'bagging_freq': 1}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  10%|#         | 1/10 [00:02<00:21,  2.42s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  20%|##        | 2/10 [00:04<00:19,  2.42s/it][32m[I 2022-09-14 16:39:37,205][0m Trial 28 finished with value: 14.929356970390408 and parameters: {'bagging_fraction': 0.6740106774668155, 'bagging_freq': 3}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  20%|##        | 2/10 [00:04<00:19,  2.42s/it]

Early stopping, best iteration is:
[27]	valid_0's rmse: 14.9294
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  30%|###       | 3/10 [00:06<00:15,  2.22s/it][32m[I 2022-09-14 16:39:39,189][0m Trial 29 finished with value: 15.081849059446629 and parameters: {'bagging_fraction': 0.8074832457814922, 'bagging_freq': 2}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  30%|###       | 3/10 [00:06<00:15,  2.22s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0818
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  40%|####      | 4/10 [00:09<00:13,  2.25s/it][32m[I 2022-09-14 16:39:41,483][0m Trial 30 finished with value: 15.0810706097762 and parameters: {'bagging_fraction': 0.8787131756845254, 'bagging_freq': 5}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  40%|####      | 4/10 [00:09<00:13,  2.25s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0811
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  50%|#####     | 5/10 [00:11<00:11,  2.31s/it][32m[I 2022-09-14 16:39:43,895][0m Trial 31 finished with value: 15.038841329558252 and parameters: {'bagging_fraction': 0.8706065292753176, 'bagging_freq': 6}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  50%|#####     | 5/10 [00:11<00:11,  2.31s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.0388
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  60%|######    | 6/10 [00:14<00:10,  2.57s/it][32m[I 2022-09-14 16:39:46,986][0m Trial 32 finished with value: 15.22060617784251 and parameters: {'bagging_fraction': 0.79935978862049, 'bagging_freq': 5}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  60%|######    | 6/10 [00:14<00:10,  2.57s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 15.2206
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  70%|#######   | 7/10 [00:16<00:07,  2.51s/it][32m[I 2022-09-14 16:39:49,354][0m Trial 33 finished with value: 15.108288609138393 and parameters: {'bagging_fraction': 0.48782851500384566, 'bagging_freq': 3}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  70%|#######   | 7/10 [00:16<00:07,  2.51s/it]

Early stopping, best iteration is:
[27]	valid_0's rmse: 15.1083
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  80%|########  | 8/10 [00:19<00:05,  2.51s/it][32m[I 2022-09-14 16:39:51,866][0m Trial 34 finished with value: 15.152857806106901 and parameters: {'bagging_fraction': 0.6336314813242785, 'bagging_freq': 6}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  80%|########  | 8/10 [00:19<00:05,  2.51s/it]

Early stopping, best iteration is:
[27]	valid_0's rmse: 15.1529
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229:  90%|######### | 9/10 [00:22<00:02,  2.69s/it][32m[I 2022-09-14 16:39:54,944][0m Trial 35 finished with value: 15.04914784228775 and parameters: {'bagging_fraction': 0.5325128039370064, 'bagging_freq': 6}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229:  90%|######### | 9/10 [00:22<00:02,  2.69s/it]

Early stopping, best iteration is:
[27]	valid_0's rmse: 15.0491
Training until validation scores don't improve for 100 rounds


bagging, val_score: 14.827229: 100%|##########| 10/10 [00:25<00:00,  2.61s/it][32m[I 2022-09-14 16:39:57,382][0m Trial 36 finished with value: 14.915847287213815 and parameters: {'bagging_fraction': 0.5356510896814295, 'bagging_freq': 6}. Best is trial 27 with value: 14.827228516609862.[0m
bagging, val_score: 14.827229: 100%|##########| 10/10 [00:25<00:00,  2.50s/it]


Early stopping, best iteration is:
[27]	valid_0's rmse: 14.9158


feature_fraction_stage2, val_score: 14.827229:   0%|          | 0/3 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 14.827229:  33%|###3      | 1/3 [00:02<00:04,  2.11s/it][32m[I 2022-09-14 16:39:59,496][0m Trial 37 finished with value: 14.827228516609862 and parameters: {'feature_fraction': 0.44800000000000006}. Best is trial 37 with value: 14.827228516609862.[0m
feature_fraction_stage2, val_score: 14.827229:  33%|###3      | 1/3 [00:02<00:04,  2.11s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 14.827229:  67%|######6   | 2/3 [00:04<00:02,  2.24s/it][32m[I 2022-09-14 16:40:01,826][0m Trial 38 finished with value: 14.827228516609862 and parameters: {'feature_fraction': 0.48000000000000004}. Best is trial 37 with value: 14.827228516609862.[0m
feature_fraction_stage2, val_score: 14.827229:  67%|######6   | 2/3 [00:04<00:02,  2.24s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 14.827229: 100%|##########| 3/3 [00:08<00:00,  3.30s/it][32m[I 2022-09-14 16:40:06,381][0m Trial 39 finished with value: 14.827228516609862 and parameters: {'feature_fraction': 0.41600000000000004}. Best is trial 37 with value: 14.827228516609862.[0m
feature_fraction_stage2, val_score: 14.827229: 100%|##########| 3/3 [00:08<00:00,  3.00s/it]


Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272


regularization_factors, val_score: 14.827229:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:   5%|5         | 1/20 [00:03<01:09,  3.67s/it][32m[I 2022-09-14 16:40:10,055][0m Trial 40 finished with value: 14.827230463391786 and parameters: {'lambda_l1': 0.00020371093680476602, 'lambda_l2': 0.00022855331068735303}. Best is trial 40 with value: 14.827230463391786.[0m
regularization_factors, val_score: 14.827229:   5%|5         | 1/20 [00:03<01:09,  3.67s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  10%|#         | 2/20 [00:10<01:38,  5.45s/it][32m[I 2022-09-14 16:40:16,742][0m Trial 41 finished with value: 14.959479501637816 and parameters: {'lambda_l1': 8.950491897152935, 'lambda_l2': 3.960051522326087e-07}. Best is trial 40 with value: 14.827230463391786.[0m
regularization_factors, val_score: 14.827229:  10%|#         | 2/20 [00:10<01:38,  5.45s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.9595
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  15%|#5        | 3/20 [00:14<01:23,  4.92s/it][32m[I 2022-09-14 16:40:21,027][0m Trial 42 finished with value: 14.847961132060576 and parameters: {'lambda_l1': 4.801604654563549e-06, 'lambda_l2': 0.010031365937883455}. Best is trial 40 with value: 14.827230463391786.[0m
regularization_factors, val_score: 14.827229:  15%|#5        | 3/20 [00:14<01:23,  4.92s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.848
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  20%|##        | 4/20 [00:17<01:08,  4.27s/it][32m[I 2022-09-14 16:40:24,307][0m Trial 43 finished with value: 14.827228522856197 and parameters: {'lambda_l1': 1.33461713512636e-06, 'lambda_l2': 4.31444964199813e-07}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  20%|##        | 4/20 [00:17<01:08,  4.27s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  25%|##5       | 5/20 [00:20<00:57,  3.80s/it][32m[I 2022-09-14 16:40:27,278][0m Trial 44 finished with value: 14.861959241933326 and parameters: {'lambda_l1': 1.2831654373934178, 'lambda_l2': 2.432211090343034e-07}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  25%|##5       | 5/20 [00:20<00:57,  3.80s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.862
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  30%|###       | 6/20 [00:23<00:47,  3.42s/it][32m[I 2022-09-14 16:40:29,944][0m Trial 45 finished with value: 14.88445936501824 and parameters: {'lambda_l1': 3.4829377948299953, 'lambda_l2': 7.601979659720522e-05}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  30%|###       | 6/20 [00:23<00:47,  3.42s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8845
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  35%|###5      | 7/20 [00:25<00:40,  3.10s/it][32m[I 2022-09-14 16:40:32,381][0m Trial 46 finished with value: 14.883582880909438 and parameters: {'lambda_l1': 8.744451249420409e-08, 'lambda_l2': 0.04855124252822208}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  35%|###5      | 7/20 [00:25<00:40,  3.10s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8836
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  40%|####      | 8/20 [00:28<00:34,  2.89s/it][32m[I 2022-09-14 16:40:34,834][0m Trial 47 finished with value: 15.360751888671304 and parameters: {'lambda_l1': 4.676130197863228, 'lambda_l2': 6.194077232528561}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  40%|####      | 8/20 [00:28<00:34,  2.89s/it]

Early stopping, best iteration is:
[29]	valid_0's rmse: 15.3608
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  45%|####5     | 9/20 [00:30<00:29,  2.70s/it][32m[I 2022-09-14 16:40:37,113][0m Trial 48 finished with value: 14.893847004197719 and parameters: {'lambda_l1': 0.0002748545059222546, 'lambda_l2': 0.0385900959931831}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  45%|####5     | 9/20 [00:30<00:29,  2.70s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8938
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  50%|#####     | 10/20 [00:32<00:25,  2.56s/it][32m[I 2022-09-14 16:40:39,357][0m Trial 49 finished with value: 14.847954434165889 and parameters: {'lambda_l1': 4.93855550241683e-07, 'lambda_l2': 0.008855605598523943}. Best is trial 43 with value: 14.827228522856197.[0m
regularization_factors, val_score: 14.827229:  50%|#####     | 10/20 [00:32<00:25,  2.56s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.848
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  55%|#####5    | 11/20 [00:35<00:21,  2.43s/it][32m[I 2022-09-14 16:40:41,493][0m Trial 50 finished with value: 14.82722851716233 and parameters: {'lambda_l1': 1.2623703291028789e-08, 'lambda_l2': 1.0038635156819914e-08}. Best is trial 50 with value: 14.82722851716233.[0m
regularization_factors, val_score: 14.827229:  55%|#####5    | 11/20 [00:35<00:21,  2.43s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  60%|######    | 12/20 [00:37<00:18,  2.35s/it][32m[I 2022-09-14 16:40:43,667][0m Trial 51 finished with value: 14.827228517162863 and parameters: {'lambda_l1': 1.0965673738861967e-08, 'lambda_l2': 1.0152650352852721e-08}. Best is trial 50 with value: 14.82722851716233.[0m
regularization_factors, val_score: 14.827229:  60%|######    | 12/20 [00:37<00:18,  2.35s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  65%|######5   | 13/20 [00:39<00:15,  2.27s/it][32m[I 2022-09-14 16:40:45,752][0m Trial 52 finished with value: 14.82722851682553 and parameters: {'lambda_l1': 1.015168774066269e-08, 'lambda_l2': 1.5689356083438043e-08}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  65%|######5   | 13/20 [00:39<00:15,  2.27s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  70%|#######   | 14/20 [00:41<00:13,  2.25s/it][32m[I 2022-09-14 16:40:47,949][0m Trial 53 finished with value: 14.82722851685729 and parameters: {'lambda_l1': 1.7388421643580363e-08, 'lambda_l2': 1.6446493388953197e-08}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  70%|#######   | 14/20 [00:41<00:13,  2.25s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  75%|#######5  | 15/20 [00:43<00:11,  2.21s/it][32m[I 2022-09-14 16:40:50,062][0m Trial 54 finished with value: 14.827228592434018 and parameters: {'lambda_l1': 2.182491054228404e-05, 'lambda_l2': 7.895187722115892e-06}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  75%|#######5  | 15/20 [00:43<00:11,  2.21s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  80%|########  | 16/20 [00:45<00:08,  2.15s/it][32m[I 2022-09-14 16:40:52,064][0m Trial 55 finished with value: 14.827233565090244 and parameters: {'lambda_l1': 0.016634396003977616, 'lambda_l2': 6.352852458835785e-06}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  80%|########  | 16/20 [00:45<00:08,  2.15s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  85%|########5 | 17/20 [00:48<00:07,  2.43s/it][32m[I 2022-09-14 16:40:55,154][0m Trial 56 finished with value: 14.827228519072856 and parameters: {'lambda_l1': 1.1270610346183604e-07, 'lambda_l2': 7.173546951825693e-08}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  85%|########5 | 17/20 [00:48<00:07,  2.43s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  90%|######### | 18/20 [00:51<00:04,  2.38s/it][32m[I 2022-09-14 16:40:57,416][0m Trial 57 finished with value: 14.827231829515352 and parameters: {'lambda_l1': 0.010897201065409357, 'lambda_l2': 4.562688904518016e-06}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  90%|######### | 18/20 [00:51<00:04,  2.38s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229:  95%|#########5| 19/20 [00:53<00:02,  2.38s/it][32m[I 2022-09-14 16:40:59,793][0m Trial 58 finished with value: 14.82722851769475 and parameters: {'lambda_l1': 9.91067605339776e-08, 'lambda_l2': 5.1144534053431675e-08}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229:  95%|#########5| 19/20 [00:53<00:02,  2.38s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 14.827229: 100%|##########| 20/20 [00:55<00:00,  2.30s/it][32m[I 2022-09-14 16:41:01,925][0m Trial 59 finished with value: 14.827228554321096 and parameters: {'lambda_l1': 2.1798495482139676e-05, 'lambda_l2': 1.8827519084663626e-06}. Best is trial 52 with value: 14.82722851682553.[0m
regularization_factors, val_score: 14.827229: 100%|##########| 20/20 [00:55<00:00,  2.78s/it]


Early stopping, best iteration is:
[25]	valid_0's rmse: 14.8272


min_data_in_leaf, val_score: 14.827229:   0%|          | 0/5 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 14.827229:  20%|##        | 1/5 [00:02<00:11,  2.93s/it][32m[I 2022-09-14 16:41:04,855][0m Trial 60 finished with value: 14.936518739509394 and parameters: {'min_child_samples': 25}. Best is trial 60 with value: 14.936518739509394.[0m
min_data_in_leaf, val_score: 14.827229:  20%|##        | 1/5 [00:02<00:11,  2.93s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.9365
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 14.827229:  40%|####      | 2/5 [00:05<00:07,  2.59s/it][32m[I 2022-09-14 16:41:07,213][0m Trial 61 finished with value: 14.933895545555098 and parameters: {'min_child_samples': 5}. Best is trial 61 with value: 14.933895545555098.[0m
min_data_in_leaf, val_score: 14.827229:  40%|####      | 2/5 [00:05<00:07,  2.59s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.9339
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 14.827229:  60%|######    | 3/5 [00:07<00:05,  2.57s/it][32m[I 2022-09-14 16:41:09,754][0m Trial 62 finished with value: 15.034606469534936 and parameters: {'min_child_samples': 50}. Best is trial 61 with value: 14.933895545555098.[0m
min_data_in_leaf, val_score: 14.827229:  60%|######    | 3/5 [00:07<00:05,  2.57s/it]

Early stopping, best iteration is:
[28]	valid_0's rmse: 15.0346
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 14.827229:  80%|########  | 4/5 [00:10<00:02,  2.67s/it][32m[I 2022-09-14 16:41:12,576][0m Trial 63 finished with value: 14.916367967292127 and parameters: {'min_child_samples': 10}. Best is trial 63 with value: 14.916367967292127.[0m
min_data_in_leaf, val_score: 14.827229:  80%|########  | 4/5 [00:10<00:02,  2.67s/it]

Early stopping, best iteration is:
[25]	valid_0's rmse: 14.9164
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 14.827229: 100%|##########| 5/5 [00:13<00:00,  2.65s/it][32m[I 2022-09-14 16:41:15,198][0m Trial 64 finished with value: 15.565546344413237 and parameters: {'min_child_samples': 100}. Best is trial 63 with value: 14.916367967292127.[0m
min_data_in_leaf, val_score: 14.827229: 100%|##########| 5/5 [00:13<00:00,  2.65s/it]

Early stopping, best iteration is:
[28]	valid_0's rmse: 15.5655
training: rmse=11.097780654985154, mae=7.192521313225311, r2=0.951142098263694
test: rmse=14.827228496098122, mae=10.081967018887479, r2=0.9652191598537031





In [170]:
best_params = model.model.params
print("Best params:", best_params)
print("  Params: ")
for key, value in best_params.items():
    print("    {}: {}".format(key, value))

Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'categorical_feature': 0, 'feature_pre_filter': False, 'lambda_l1': 0.0, 'lambda_l2': 0.0, 'num_leaves': 31, 'feature_fraction': 0.4, 'bagging_fraction': 0.5269801809894871, 'bagging_freq': 1, 'min_child_samples': 20, 'num_iterations': 10000, 'early_stopping_round': 100}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    categorical_feature: 0
    feature_pre_filter: False
    lambda_l1: 0.0
    lambda_l2: 0.0
    num_leaves: 31
    feature_fraction: 0.4
    bagging_fraction: 0.5269801809894871
    bagging_freq: 1
    min_child_samples: 20
    num_iterations: 10000
    early_stopping_round: 100


In [171]:
predicted_2019 = model.predict(2019)

In [172]:
data.addYearDemandfromForecast(2019, predicted_2019)

In [173]:
data.df_trans

Unnamed: 0,demand_point_index,x_coordinate,y_coordinate,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,0,0.5,0.5,0.352242,0.667932,0.958593,2.911901,4.338274,6.561995,8.454417,10.595324,13.119572,22.199108
1,1,1.5,0.5,0.325940,0.591964,0.862652,2.589068,4.196034,5.745551,8.753195,11.126995,12.020091,21.265157
2,2,2.5,0.5,0.373752,0.591890,0.969733,2.641432,3.541772,5.469161,8.414627,10.115336,14.018254,21.896602
3,3,3.5,0.5,0.420686,0.584055,0.906547,2.378577,3.888121,5.846089,9.083868,12.424885,15.012302,24.230437
4,4,4.5,0.5,0.475621,0.647940,0.981544,2.665400,4.218711,6.776609,8.851107,11.731131,16.355563,24.515097
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4091,4091,59.5,63.5,0.171015,0.334565,0.556055,1.373291,1.837586,2.517146,3.352280,4.149888,5.426193,7.395835
4092,4092,60.5,63.5,0.041716,0.061741,0.131291,0.386540,0.755846,0.941116,1.107797,1.309479,2.057450,6.994499
4093,4093,61.5,63.5,0.100895,0.180352,0.296299,0.705373,1.300220,1.608609,1.822806,2.333681,3.218519,7.474411
4094,4094,62.5,63.5,0.155353,0.290825,0.557803,1.516066,2.399426,2.719197,4.494515,6.096858,6.262574,12.834038


In [174]:
predicted_2020 = model.predict(2020)

In [175]:
data.addYearDemandfromForecast(2020, predicted_2020)

In [176]:
data.df_trans

Unnamed: 0,demand_point_index,x_coordinate,y_coordinate,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0,0.5,0.5,0.352242,0.667932,0.958593,2.911901,4.338274,6.561995,8.454417,10.595324,13.119572,22.199108,26.918361
1,1,1.5,0.5,0.325940,0.591964,0.862652,2.589068,4.196034,5.745551,8.753195,11.126995,12.020091,21.265157,26.385195
2,2,2.5,0.5,0.373752,0.591890,0.969733,2.641432,3.541772,5.469161,8.414627,10.115336,14.018254,21.896602,26.878407
3,3,3.5,0.5,0.420686,0.584055,0.906547,2.378577,3.888121,5.846089,9.083868,12.424885,15.012302,24.230437,28.356497
4,4,4.5,0.5,0.475621,0.647940,0.981544,2.665400,4.218711,6.776609,8.851107,11.731131,16.355563,24.515097,28.982798
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4091,4091,59.5,63.5,0.171015,0.334565,0.556055,1.373291,1.837586,2.517146,3.352280,4.149888,5.426193,7.395835,12.621361
4092,4092,60.5,63.5,0.041716,0.061741,0.131291,0.386540,0.755846,0.941116,1.107797,1.309479,2.057450,6.994499,11.626224
4093,4093,61.5,63.5,0.100895,0.180352,0.296299,0.705373,1.300220,1.608609,1.822806,2.333681,3.218519,7.474411,11.849275
4094,4094,62.5,63.5,0.155353,0.290825,0.557803,1.516066,2.399426,2.719197,4.494515,6.096858,6.262574,12.834038,18.689469


In [157]:
data.df_trans.to_csv("data/forecast.csv")