In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE

from lightgbm import LGBMRegressor

from skopt import BayesSearchCV
from mccv import MonteCarloCV

from functions_new import get_train_test
from functions_new import rolling_window_grid

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Læs data

In [2]:
df = pd.read_csv("data/training_data.csv", sep = ";")
df = pd.get_dummies(df, columns=['HourCET'], prefix='Hour')

# Models

In [8]:
def LightGBM_all(search, df_train, df_test, train_size, path, get_counts = True, hyp_opt_freq=60):
    wind_col = [p for p in df.columns.values if "wind" in p]
    solar_col = [p for p in df.columns.values if "solar" in p]
    temp_col = [p for p in df.columns.values if "temp" in p]
    load_col = [p for p in df.columns.values if "load" in p and '_h' in p]
    weekday = ['Mon', 'Tue', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
    windprod_col = [p for p in df.columns.values if "Wind" in p in p and '_h' in p]
    price_col = [p for p in df.columns.values if "Price" in p and p != 'PriceMWh' and '_h' in p]
    min_col = [p for p in df.columns.values if "Min" in p]
    max_col = [p for p in df.columns.values if "Max" in p]
    avg_col = [p for p in df.columns.values if "Avg" in p]
    hour_col = [p for p in df_train.columns.values if "Hour_" in p]

    X = load_col + price_col + windprod_col + weekday + temp_col + solar_col + wind_col + min_col + max_col + avg_col + hour_col
       
    test_size = 1
    Y = ['PriceMWh']
    dummies = weekday + hour_col
    X_no_dummies = [x for x in X if x not in dummies]

    if get_counts:
        preds, counts, scores, cv_res = rolling_window_grid(search, df_train, df_test, X, Y, X_no_dummies, train_size=train_size, test_size=test_size, hyp_opt_freq=hyp_opt_freq, get_counts=get_counts)
    else:
        preds = rolling_window_grid(search, df_train, df_test, X, Y, X_no_dummies, train_size=train_size, test_size=test_size, hyp_opt_freq=hyp_opt_freq, get_counts=get_counts)
    
    out = [preds.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)
        
    if get_counts:
        path1 = 'results\coef_' + path
        with open(path1, 'wb') as fp:
            pickle.dump([counts, X], fp)
        path2 = r'results\training_set_' + path
        with open(path2, 'wb') as fp:
            pickle.dump(scores, fp)
        path2 = r'results\cv_' + path
        with open(path2, 'wb') as fp:
            pickle.dump(cv_res, fp)
        
    return preds

def LightGBM_nw(search, df_train, df_test, train_size, path, hyp_opt_freq=60):
    load_col = [p for p in df.columns.values if "load" in p and '_h' in p]
    weekday = ['Mon', 'Tue', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
    windprod_col = [p for p in df.columns.values if "Wind" in p in p and '_h' in p]
    price_col = [p for p in df.columns.values if "Price" in p and p != 'PriceMWh' and '_h' in p]
    min_col = [p for p in df.columns.values if "Min" in p]
    max_col = [p for p in df.columns.values if "Max" in p]
    avg_col = [p for p in df.columns.values if "Avg" in p]
    hour_col = [p for p in df_train.columns.values if "Hour_" in p]

    X = load_col + price_col + windprod_col + weekday + min_col + max_col + avg_col + hour_col
    
    test_size = 1
    Y = ['PriceMWh']
    dummies = weekday + hour_col
    X_no_dummies = [x for x in X if x not in dummies]
    
    preds = rolling_window_grid(search, df_train, df_test, X, Y, X_no_dummies, train_size=train_size, test_size=test_size, hyp_opt_freq=hyp_opt_freq)
    out = [preds.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)
    
    return preds


# Grid Search

In [4]:
model_lgbm = LGBMRegressor(
                    n_jobs=-1,
                    reg_lambda = 0.1, 
                    reg_alpha= 0.1,
                    learning_rate=0.05,
                    boosting_type = "goss",
                    saved_feature_importance_type = 1,
                    )

mc_cross_val = MonteCarloCV(n_splits=2, 
                    train_size=0.6, 
                    test_size=0.1, 
                    gap=168)

parameters = {
        'max_depth': [3, 7, 16],
        'n_estimators': [500, 1000, 1500, 2000],
        'num_leaves': [16, 31, 50],
        'min_child_samples': [20, 50, 100, 150]
}

grid_search = GridSearchCV(
    estimator=model_lgbm,
    param_grid=parameters,
    n_jobs = -1,
    cv = mc_cross_val,
    verbose=0
)

# Bayes search

In [5]:
model_lgbm = LGBMRegressor(
                    n_jobs=-1,
                    boosting_type = "goss",
                    saved_feature_importance_type = 1,
                    ) #, num_leaves=16, min_child_samples = 35, reg_lambda=0.5, n_estimators=500

mc_cross_val = MonteCarloCV(n_splits=4, 
                    train_size=0.6, 
                    test_size=0.1, 
                    gap=168)

parameters = {
      'learning_rate': (0.0001, 0.1, "log-uniform"),
      'max_depth': (4,50),
      'n_estimators': (100, 2000),
      'num_leaves': (8, 50),
      'min_child_samples': (20, 200),
      'reg_lambda': (0.0001, 1),
      'reg_alpha': (1e-5, 0.999,"log-uniform"),
      'reg_lambda': (1e-5, 0.999,"log-uniform"),
}

bayes_search = BayesSearchCV(
    estimator=model_lgbm,
    n_iter = 70,
    search_spaces=parameters,
    n_jobs = -1,
    cv = mc_cross_val,
    verbose=0,
    scoring='neg_mean_absolute_error'
)

# Period 1

In [None]:
# all data
np.int = np.int_
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3
preds = LightGBM_all(bayes_search, df_train, df_test, train_size, 'results\lightGBMallbayes_Period1_Year3.txt', get_counts=True, hyp_opt_freq=183)

Y = ["PriceMWh"]
plt.plot(range(len(df_test[Y])), df_test[Y])
plt.plot(range(len(preds)), preds)
plt.show()

print('MAE', MAE(y_true = df_test[Y], y_pred = preds))
print('MSE', MSE(y_true = df_test[Y], y_pred =  preds))

In [None]:
#no weather data
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3
preds = LightGBM_nw(bayes_search, df_train, df_test, train_size, 'results\lightGBMnwbayes_Period1_Year3.txt', hyp_opt_freq=183)

Y = ["PriceMWh"]
plt.plot(range(len(df_test[Y])), df_test[Y])
plt.plot(range(len(preds)), preds)
plt.show()

print('MAE', MAE(y_true = df_test[Y], y_pred = preds))
print('MSE', MSE(y_true = df_test[Y], y_pred =  preds))

## Periode 2: 2021, 2022
 

In [None]:
# all
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3
preds = LightGBM_all(bayes_search, df_train, df_test, train_size, 'results\lightGBMallbayes_Period2_Year3.txt', get_counts=True, hyp_opt_freq=183)

Y = ["PriceMWh"]
plt.plot(range(len(df_test[Y])), df_test[Y])
plt.plot(range(len(preds)), preds)
plt.show()

print('MAE', MAE(y_true = df_test[Y], y_pred = preds))
print('MSE', MSE(y_true = df_test[Y], y_pred =  preds))

In [None]:
#no weather data
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3
preds = LightGBM_nw(bayes_search, df_train, df_test, train_size, 'results\lightGBMnwbayes_Period2_Year3.txt', hyp_opt_freq=183)

Y = ["PriceMWh"]
plt.plot(range(len(df_test[Y])), df_test[Y])
plt.plot(range(len(preds)), preds)
plt.show()

print('MAE', MAE(y_true = df_test[Y], y_pred = preds))
print('MSE', MSE(y_true = df_test[Y], y_pred =  preds))