In [1]:
import pandas as pd
import numpy as np
from catboost import Pool, CatBoostRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import TimeSeriesSplit 
import optuna 
import json
from sklearn.metrics import mean_absolute_error

In [2]:
#Load inn datasets
X_test  = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/X_test.parquet')
X_train = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/X_train.parquet')
y_train = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/Y_train.parquet')
y_train_a = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/Y_train_a.parquet')
y_train_b = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/Y_train_b.parquet')
y_train_c = pd.read_parquet('../data/prepared_datasets/avg/no_duplicates/Y_train_c.parquet')

In [4]:
def test_train_split(df):
    dates_2 = (df.index >= '2023-04-01') & (df.index <= '2023-04-15')
    dates_1 = (df.index >= '2021-05-01') & (df.index <= '2021-08-01')

    test_set = df[dates_1 | dates_2]

    training_set = df[~(dates_1 | dates_2)]

    X_train = training_set.drop("pv_measurement", axis=1)
    y_train = training_set['pv_measurement']

    X_test = test_set.drop("pv_measurement", axis=1)
    y_test = test_set['pv_measurement'] 

    
    
    return X_train, X_test, y_train, y_test


X_train_new_c, X_test_new_c, y_train_new_c, y_test_c = test_train_split(pd.concat([X_train[X_train["location"] == "C"].drop("location", axis=1), y_train_c], axis=1))

In [10]:
#Create a pool of data

train_pool_c = Pool(X_train_new_c, y_train_new_c)
test_pool_c = Pool(X_test_new_c) 

In [None]:
#For location A
def objective(trial, X_train, y_train):
    params = {
        "iterations": trial.suggest_int("iterations", 300, 3500),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 13),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
        "l2_leaf_reg": trial.suggest_int("l2_leaf_reg", 2, 10),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.3, 1.0),
        "random_strength": trial.suggest_float("random_strength", 0.3, 1.0),
        "border_count": trial.suggest_int("border_count", 1, 1000),
        "rsm": trial.suggest_float("rsm", 0.05, 1),
        "loss_function" :"LogCosh"
    }

    catboost_model_c = CatBoostRegressor(**params, verbose=400)
    catboost_model_c.fit(train_pool_c)
    pred_c = pd.DataFrame(catboost_model_c.predict(test_pool_c))
    MAE_c = mean_absolute_error(y_test_c, pred_c)
    return MAE_c

study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=80)

[I 2023-11-11 08:18:37,898] A new study created in memory with name: no-name-0d54271e-d2c9-4a01-9fa8-616a6ec70d34


0:	learn: 69.1701711	total: 76.3ms	remaining: 2m 45s
400:	learn: 21.6919578	total: 18.1s	remaining: 1m 19s
800:	learn: 18.9206608	total: 40.8s	remaining: 1m 9s
1200:	learn: 18.1874123	total: 1m 2s	remaining: 50.5s
1600:	learn: 17.6436143	total: 1m 25s	remaining: 30.2s
2000:	learn: 17.0561885	total: 1m 40s	remaining: 8.35s


[I 2023-11-11 08:20:23,327] Trial 0 finished with value: 51.61922475340231 and parameters: {'iterations': 2168, 'learning_rate': 0.007142459071760093, 'depth': 7, 'min_data_in_leaf': 73, 'l2_leaf_reg': 2, 'bagging_temperature': 0.46273908508719064, 'random_strength': 0.9000985661208238, 'border_count': 749, 'rsm': 0.23373756721140937}. Best is trial 0 with value: 51.61922475340231.


2167:	learn: 16.7986986	total: 1m 45s	remaining: 0us
0:	learn: 69.3358464	total: 245ms	remaining: 12m 24s
400:	learn: 38.0233280	total: 1m 50s	remaining: 12m 10s
800:	learn: 24.4912015	total: 3m 42s	remaining: 10m 23s
1200:	learn: 19.1323570	total: 5m 33s	remaining: 8m 32s
1600:	learn: 16.7640812	total: 7m 34s	remaining: 6m 49s
2000:	learn: 15.3228009	total: 9m 27s	remaining: 4m 55s
2400:	learn: 14.3653308	total: 11m 23s	remaining: 3m 3s
2800:	learn: 13.6761732	total: 13m 19s	remaining: 1m 9s
3043:	learn: 13.3014562	total: 14m 27s	remaining: 0us


[I 2023-11-11 08:34:52,668] Trial 1 finished with value: 51.832130599402234 and parameters: {'iterations': 3044, 'learning_rate': 0.0021194842149737, 'depth': 12, 'min_data_in_leaf': 71, 'l2_leaf_reg': 2, 'bagging_temperature': 0.9283436649217105, 'random_strength': 0.7868782124894353, 'border_count': 570, 'rsm': 0.9513351080571983}. Best is trial 0 with value: 51.61922475340231.


0:	learn: 66.6555073	total: 12ms	remaining: 37.6s
400:	learn: 20.6050719	total: 8.04s	remaining: 55s
800:	learn: 19.7547703	total: 16.9s	remaining: 49.5s
1200:	learn: 19.3185754	total: 25.4s	remaining: 41.1s
1600:	learn: 18.9612937	total: 33.7s	remaining: 32.5s
2000:	learn: 18.6808021	total: 41.9s	remaining: 23.9s
2400:	learn: 18.4679440	total: 50.1s	remaining: 15.5s
2800:	learn: 18.2922576	total: 57.7s	remaining: 7.07s


[I 2023-11-11 08:35:57,336] Trial 2 finished with value: 53.61146806294108 and parameters: {'iterations': 3144, 'learning_rate': 0.08779518397559141, 'depth': 2, 'min_data_in_leaf': 73, 'l2_leaf_reg': 3, 'bagging_temperature': 0.5126984950230389, 'random_strength': 0.6556409400458487, 'border_count': 160, 'rsm': 0.2849848611222509}. Best is trial 0 with value: 51.61922475340231.


3143:	learn: 18.1811535	total: 1m 4s	remaining: 0us
0:	learn: 69.2921052	total: 50.3ms	remaining: 2m 16s
400:	learn: 29.8470681	total: 18.9s	remaining: 1m 49s
800:	learn: 20.6342614	total: 37.9s	remaining: 1m 30s
1200:	learn: 18.2365912	total: 57.1s	remaining: 1m 12s
1600:	learn: 17.3705663	total: 1m 16s	remaining: 53.5s
2000:	learn: 16.8028501	total: 1m 35s	remaining: 34.3s
2400:	learn: 16.3334937	total: 1m 54s	remaining: 15.3s


[I 2023-11-11 08:38:08,671] Trial 3 finished with value: 51.21798717701078 and parameters: {'iterations': 2721, 'learning_rate': 0.0034564289907322923, 'depth': 9, 'min_data_in_leaf': 57, 'l2_leaf_reg': 3, 'bagging_temperature': 0.6236337597257187, 'random_strength': 0.7939176933471583, 'border_count': 537, 'rsm': 0.3974485101227929}. Best is trial 3 with value: 51.21798717701078.


2720:	learn: 15.9675289	total: 2m 10s	remaining: 0us
0:	learn: 65.7095562	total: 17.8ms	remaining: 47.5s
400:	learn: 18.1357372	total: 9.08s	remaining: 51.5s
800:	learn: 16.6731535	total: 18.7s	remaining: 43.7s
1200:	learn: 15.7377542	total: 28.2s	remaining: 34.6s
1600:	learn: 15.0779551	total: 38s	remaining: 25.6s
2000:	learn: 14.5350697	total: 47.8s	remaining: 16.1s
2400:	learn: 14.0650632	total: 57.6s	remaining: 6.62s


[I 2023-11-11 08:39:13,636] Trial 4 finished with value: 53.3170944213525 and parameters: {'iterations': 2677, 'learning_rate': 0.08329840920474495, 'depth': 4, 'min_data_in_leaf': 98, 'l2_leaf_reg': 3, 'bagging_temperature': 0.6644671892277159, 'random_strength': 0.6298408356078586, 'border_count': 853, 'rsm': 0.4316600138110592}. Best is trial 3 with value: 51.21798717701078.


2676:	learn: 13.7469550	total: 1m 4s	remaining: 0us
0:	learn: 68.1571426	total: 33.9ms	remaining: 1m 10s
400:	learn: 18.3376790	total: 12s	remaining: 49.8s
800:	learn: 16.8411852	total: 24.4s	remaining: 38.7s
1200:	learn: 15.6438034	total: 35.9s	remaining: 26.1s
1600:	learn: 14.7558591	total: 47.3s	remaining: 13.9s
2000:	learn: 14.1104200	total: 59.4s	remaining: 2.14s


[I 2023-11-11 08:40:15,477] Trial 5 finished with value: 51.781136578294266 and parameters: {'iterations': 2073, 'learning_rate': 0.027475768133830555, 'depth': 6, 'min_data_in_leaf': 16, 'l2_leaf_reg': 7, 'bagging_temperature': 0.408327826016915, 'random_strength': 0.7597076195006005, 'border_count': 814, 'rsm': 0.3585290064429964}. Best is trial 3 with value: 51.21798717701078.


2072:	learn: 13.9812242	total: 1m 1s	remaining: 0us
0:	learn: 69.4675960	total: 14ms	remaining: 14.8s
400:	learn: 62.2217749	total: 5.84s	remaining: 9.66s
800:	learn: 56.3947877	total: 11.7s	remaining: 3.85s


[I 2023-11-11 08:40:31,458] Trial 6 finished with value: 177.43874490808574 and parameters: {'iterations': 1064, 'learning_rate': 0.00152338724396836, 'depth': 1, 'min_data_in_leaf': 64, 'l2_leaf_reg': 4, 'bagging_temperature': 0.46082440198944324, 'random_strength': 0.8164814428310097, 'border_count': 508, 'rsm': 0.06290626488013214}. Best is trial 3 with value: 51.21798717701078.


1063:	learn: 52.7489057	total: 15.8s	remaining: 0us
0:	learn: 69.3114467	total: 83.6ms	remaining: 2m 19s
400:	learn: 34.3730138	total: 41.4s	remaining: 2m 11s
800:	learn: 22.3288902	total: 1m 23s	remaining: 1m 31s
1200:	learn: 18.4731441	total: 2m 6s	remaining: 50.2s
1600:	learn: 16.7824567	total: 2m 52s	remaining: 8.06s
1675:	learn: 16.5650828	total: 3m	remaining: 0us


[I 2023-11-11 08:43:32,505] Trial 7 finished with value: 52.53820149041374 and parameters: {'iterations': 1676, 'learning_rate': 0.00262436531793633, 'depth': 11, 'min_data_in_leaf': 33, 'l2_leaf_reg': 3, 'bagging_temperature': 0.4664851553017465, 'random_strength': 0.9861565785346675, 'border_count': 683, 'rsm': 0.5530304774514476}. Best is trial 3 with value: 51.21798717701078.


0:	learn: 66.8354985	total: 19ms	remaining: 21.9s
400:	learn: 19.4331222	total: 9.44s	remaining: 17.8s
800:	learn: 18.5074811	total: 19.2s	remaining: 8.58s


[I 2023-11-11 08:44:00,002] Trial 8 finished with value: 52.5344224139224 and parameters: {'iterations': 1158, 'learning_rate': 0.07201329697225572, 'depth': 3, 'min_data_in_leaf': 80, 'l2_leaf_reg': 6, 'bagging_temperature': 0.8763244105390795, 'random_strength': 0.5912327045889827, 'border_count': 73, 'rsm': 0.7540520137413443}. Best is trial 3 with value: 51.21798717701078.


1157:	learn: 18.0007569	total: 27.3s	remaining: 0us
0:	learn: 67.4738965	total: 23ms	remaining: 34.5s
400:	learn: 14.1766456	total: 11.9s	remaining: 32.7s
800:	learn: 11.3617008	total: 23.9s	remaining: 20.8s
1200:	learn: 9.5074544	total: 36.2s	remaining: 8.97s


[I 2023-11-11 08:44:45,548] Trial 9 finished with value: 52.58163616705161 and parameters: {'iterations': 1499, 'learning_rate': 0.04923308171574376, 'depth': 8, 'min_data_in_leaf': 98, 'l2_leaf_reg': 2, 'bagging_temperature': 0.6081396312283999, 'random_strength': 0.6946859571715414, 'border_count': 95, 'rsm': 0.7753608565860737}. Best is trial 3 with value: 51.21798717701078.


1498:	learn: 8.5694567	total: 45.2s	remaining: 0us
0:	learn: 69.1214084	total: 33.5ms	remaining: 1m 29s
400:	learn: 20.4781608	total: 17.1s	remaining: 1m 37s
800:	learn: 16.4570287	total: 34.3s	remaining: 1m 20s
1200:	learn: 15.0765816	total: 56.8s	remaining: 1m 9s
1600:	learn: 14.0886780	total: 1m 21s	remaining: 54.7s
2000:	learn: 13.1347211	total: 1m 39s	remaining: 33.8s
2400:	learn: 12.1947648	total: 1m 58s	remaining: 13.7s
2677:	learn: 11.6984747	total: 2m 12s	remaining: 0us


[I 2023-11-11 08:46:58,780] Trial 10 finished with value: 50.93136577721838 and parameters: {'iterations': 2678, 'learning_rate': 0.006288123719275324, 'depth': 10, 'min_data_in_leaf': 48, 'l2_leaf_reg': 10, 'bagging_temperature': 0.7589104593031024, 'random_strength': 0.44608116404729853, 'border_count': 281, 'rsm': 0.55398403369607}. Best is trial 10 with value: 50.93136577721838.


0:	learn: 69.1701358	total: 53.4ms	remaining: 2m 16s
400:	learn: 21.9477793	total: 20.9s	remaining: 1m 52s
800:	learn: 17.0550397	total: 41.9s	remaining: 1m 31s
1200:	learn: 15.7286196	total: 1m 4s	remaining: 1m 12s
1600:	learn: 14.7974296	total: 1m 26s	remaining: 51.4s
2000:	learn: 13.9129363	total: 1m 48s	remaining: 29.9s
2400:	learn: 13.0819340	total: 2m 10s	remaining: 8.33s
2553:	learn: 12.7316446	total: 2m 19s	remaining: 0us


[I 2023-11-11 08:49:18,757] Trial 11 finished with value: 51.4355990320293 and parameters: {'iterations': 2554, 'learning_rate': 0.005449996393476758, 'depth': 10, 'min_data_in_leaf': 46, 'l2_leaf_reg': 10, 'bagging_temperature': 0.7516557492689095, 'random_strength': 0.4372424679772966, 'border_count': 354, 'rsm': 0.5723759174973126}. Best is trial 10 with value: 50.93136577721838.


0:	learn: 68.7768173	total: 48.8ms	remaining: 2m 46s
400:	learn: 17.4817265	total: 19.4s	remaining: 2m 25s
800:	learn: 15.5526864	total: 39.4s	remaining: 2m 8s
1200:	learn: 14.1867015	total: 59.5s	remaining: 1m 49s
1600:	learn: 12.9696139	total: 1m 19s	remaining: 1m 29s
2000:	learn: 11.7862526	total: 1m 39s	remaining: 1m 10s
2400:	learn: 10.8493967	total: 2m	remaining: 50.7s
2800:	learn: 10.0856049	total: 2m 20s	remaining: 30.7s
3200:	learn: 9.4328327	total: 2m 41s	remaining: 10.7s
3412:	learn: 9.0112072	total: 3m	remaining: 0us


[I 2023-11-11 08:52:19,984] Trial 12 finished with value: 51.70252412644067 and parameters: {'iterations': 3413, 'learning_rate': 0.011785451205830492, 'depth': 9, 'min_data_in_leaf': 47, 'l2_leaf_reg': 10, 'bagging_temperature': 0.7820675235533736, 'random_strength': 0.3188999636065907, 'border_count': 294, 'rsm': 0.4648389640513674}. Best is trial 10 with value: 50.93136577721838.


0:	learn: 69.2739616	total: 675ms	remaining: 28m 59s
400:	learn: 25.1525517	total: 4m 37s	remaining: 25m 5s
800:	learn: 16.2599921	total: 9m 14s	remaining: 20m 29s
1200:	learn: 13.4234099	total: 13m 54s	remaining: 15m 56s
1600:	learn: 11.8951239	total: 18m 57s	remaining: 11m 34s
2000:	learn: 10.8496756	total: 24m 4s	remaining: 6m 56s
2400:	learn: 9.9871905	total: 29m 8s	remaining: 2m 8s
2577:	learn: 9.5828475	total: 31m 25s	remaining: 0us


[I 2023-11-11 09:23:47,799] Trial 13 finished with value: 51.93016767060407 and parameters: {'iterations': 2578, 'learning_rate': 0.0038845662497108266, 'depth': 13, 'min_data_in_leaf': 28, 'l2_leaf_reg': 8, 'bagging_temperature': 0.625019549695535, 'random_strength': 0.5049297490421663, 'border_count': 987, 'rsm': 0.6124464442656238}. Best is trial 10 with value: 50.93136577721838.


0:	learn: 69.4155002	total: 65.8ms	remaining: 22.3s


[I 2023-11-11 09:24:05,608] Trial 14 finished with value: 183.62674491298944 and parameters: {'iterations': 340, 'learning_rate': 0.001156428366466753, 'depth': 6, 'min_data_in_leaf': 5, 'l2_leaf_reg': 5, 'bagging_temperature': 0.9965434481935553, 'random_strength': 0.5312172987202003, 'border_count': 336, 'rsm': 0.3999758382646914}. Best is trial 10 with value: 50.93136577721838.


339:	learn: 53.8926233	total: 17.6s	remaining: 0us
0:	learn: 68.7966060	total: 67.7ms	remaining: 3m 15s
400:	learn: 16.7937204	total: 43.6s	remaining: 4m 31s
800:	learn: 14.6344491	total: 1m 27s	remaining: 3m 49s
1200:	learn: 12.7500964	total: 2m 12s	remaining: 3m 6s
1600:	learn: 11.3188212	total: 2m 57s	remaining: 2m 23s


In [7]:
#to output the best paramaters
print(study.best_params)

#to output the best score returned from the trials
print(study.best_value)

with open("optuna-best-parameters_c.txt", "w") as file:
    file.write("Best paramaters: \n")
    file.write(json.dumps(study.best_params))  # Write the first string followed by a newline character
    file.write("\n")
    file.write("best score MAE: \n")
    file.write(json.dumps(study.best_value))  # Write the second string followed by a newline character

{'iterations': 2004, 'learning_rate': 0.016429110886618033, 'depth': 8, 'colsample_bylevel': 0.7338575760364532, 'min_data_in_leaf': 90, 'l2_leaf_reg': 5, 'has-time': False, 'bagging_temperature': 0.6414768311294344, 'random_strength': 0.4206698081688342, 'border_count': 487, 'rsm': 0.012360675671015169, 'nan_mode': 'min'}
57.13529571086957
