In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
import lightgbm as lgb

sns.set_style(
    style='darkgrid', 
    rc={'axes.facecolor': '.9', 'grid.color': '.8'}
)

sns.set_palette(palette='deep')
sns_c = sns.color_palette(palette='deep')

plt.rcParams["figure.figsize"] = [15, 5]
plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.facecolor"] = "white"

%matplotlib inline

In [2]:
# Read data
data = pd.read_pickle('20231108_Dataset_Processed.pkl')
data.head()

Unnamed: 0_level_0,transformer_load,radiation,windspeed_10m,windspeed_100m,windspeed_squall,nedu_e1a,nedu_e1b,nedu_e1c,nedu_e2a,nedu_e2b,...,day_of_year_cos,transformer_load_lag_2days,transformer_load_lag_5days,transformer_load_lag_7days,windspeed_100m_3h_mean,windspeed_100m_1d_mean,windspeed_squall_3h_mean,windspeed_squall_1d_mean,radiation_3h_mean,radiation_1d_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-09 10:15:00+00:00,0.655497,2139711.0,6.068184,7.706377,10.432041,2.2e-05,1.6e-05,1.2e-05,3e-05,2.6e-05,...,-0.133015,0.860733,0.149738,0.314834,7.85674,8.482728,10.248645,8.407537,1700433.0,780098.818258
2021-04-09 10:30:00+00:00,0.676091,2240682.0,6.103702,7.747967,10.514462,2.2e-05,1.5e-05,1.1e-05,3e-05,2.5e-05,...,-0.133015,0.897033,0.15986,0.455148,7.846817,8.470331,10.310933,8.393179,1777003.0,782144.193583
2021-04-09 10:45:00+00:00,0.660384,2341653.0,6.141287,7.791777,10.601013,2.1e-05,1.4e-05,1.1e-05,2.9e-05,2.4e-05,...,-0.133015,0.907155,0.17836,0.492845,7.834948,8.457775,10.36411,8.378342,1853714.0,783613.980855
2021-04-09 11:00:00+00:00,0.660035,2442624.0,6.180901,7.837771,10.691594,2e-05,1.3e-05,1e-05,2.9e-05,2.4e-05,...,-0.133015,0.932984,0.172775,0.479581,7.819994,8.445067,10.406397,8.363042,1930565.0,784508.180074
2021-04-09 11:15:00+00:00,0.68726,2466027.0,6.159403,7.807181,10.73414,1.9e-05,1.2e-05,9e-06,2.9e-05,2.4e-05,...,-0.133015,0.929145,0.180803,0.51274,7.805399,8.434417,10.449004,8.348299,2006537.0,785131.799703


In [3]:
# Splitting the dataset into train, validation, and test sets
train_size = 0.75
val_size = 0.15
test_size = 0.10

# First split to separate out the test set
train_val, test = train_test_split(data, test_size=test_size, shuffle=False)

# Second split to separate out the validation set
train, val = train_test_split(train_val, test_size=test_size/(train_size + val_size), shuffle=False)

In [4]:
fig, ax = plt.subplots()

# Plotting the data
sns.lineplot(data=train['transformer_load'], ax=ax, label='Training Set', color=sns_c[0])
sns.lineplot(data=val['transformer_load'], ax=ax, label='Validation Set', color=sns_c[1])
sns.lineplot(data=test['transformer_load'], ax=ax, label='Test Set', color=sns_c[2])

# Adding vertical lines for the start of validation and test sets
ax.axvline(val.index[0], color='black', ls='--')
ax.axvline(test.index[0], color='black', ls='--')

# Setting up the legend and titles
ax.legend()
plt.title('Data Splitting')
plt.xlabel('Date')
plt.ylabel('Transformer Load')
plt.show()

In [None]:
# Initial hyperparameters
lgb_params = {
    'objective': 'quantile',
    'metric': 'quantile',
    'boosting_type': 'gbdt',
    'seed': 42,
    'learning_rate': 0.1,
    'num_leaves': 16,
    'max_depth': -1,
    'min_data_in_leaf': 20,
    'min_gain_to_split': 0,
    'bagging_fraction': 0.9,
    'bagging_freq': 1,
    'feature_fraction': 0.9,
    'lambda_l1': 0.01,
    'lambda_l2': 0.01,
    'force_col_wise': 'true'
}

In [None]:
# Preparing the data for LightGBM
target = 'transformer_load'
train_data = lgb.Dataset(train.drop(columns=['transformer_load']), label=train['transformer_load'])
val_data = lgb.Dataset(val.drop(columns=['transformer_load']), label=val['transformer_load'])
test_data = lgb.Dataset(test.drop(columns=['transformer_load']), label=test['transformer_load'])

In [None]:
# Training models for each quantile
quantiles = [0.05, 0.5, 0.95]
models = {}

for quantile in quantiles:
    print(f"Training model for quantile: {quantile}")
    lgb_params['alpha'] = quantile
    model = lgb.train(lgb_params,
                      train_data, 
                      num_boost_round=400,
                      valid_sets=[train_data, val_data], 
                      callbacks=[lgb.early_stopping(stopping_rounds=40), lgb.log_evaluation(50)]
                      )
    models[quantile] = model