In [1]:
import pandas as pd
import pickle

from sklearn.linear_model import LinearRegression

from functions_new import get_train_test
from functions_new import rolling_window_hourly
from functions_new import backward_selection

from functions_lagtest import TestLags

import warnings
warnings.filterwarnings("ignore")

# Læs data

In [2]:
df = pd.read_csv("data/training_data.csv", sep = ";")

lag_cols = ['PriceMWh_lag1', 'PriceMWh_lag2', 'PriceMWh_lag7']
for col in lag_cols:
    df[col] = 1
    for i in range(24):
        df[col][df['HourCET']==i] = df[f'{col}_h{i}']

TestLags(df, size_of_test=1000)

# Models

In [3]:
def perform_backward_selection(df_train, X ,Y, dummies, train_size):
    cands = []
    for i in range(24):
        print(f'Hour {i}')
        if i in [0,1,2,3,4]:
            X_h = [x for x in X if 'solar' not in x]
        else:
            X_h = X
        current_df = df_train.loc[df_train['HourCET'] == i]
        cand = backward_selection(current_df, X_h, Y, dummies, train_size, print_res=False)
        cands.append(cand)
    return cands 

def benchmark1(df_train, df_test, train_size, path):
    Y = ['PriceMWh']
    weekday = ['Mon', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
    X = ['PriceMWh_lag1', 'PriceMWh_lag2', 'PriceMWh_lag7'] + weekday
    X = [X for _ in range(24)]

    test_size = 1
    model_lm = LinearRegression()
    preds= rolling_window_hourly(model_lm, df_train, df_test, X, Y, weekday, train_size=train_size, test_size=test_size, print_res=True)
    out = [preds.values.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)

def benchmark2(df_train, df_test, train_size, path):
    Y = ['PriceMWh']
    weekday = ['Mon', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
    X = ['PriceMWh_lag1', 'PriceMWh_lag2', 'PriceMWh_lag7', 'Grid load forecast DK1', 'Wind power forecast'] + weekday
    X = [X for _ in range(24)]

    test_size = 1
    model_lm = LinearRegression()
    preds= rolling_window_hourly(model_lm, df_train, df_test, X, Y, weekday, train_size=train_size, test_size=test_size, print_res=True)
    out = [preds.values.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)

def benchmark3(df_train, df_test, train_size, path):
    Y = ['PriceMWh']
    cols = df_train.columns.values
    weekday = ['Mon', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
    X = [col for col in cols if 'Price' in col and '_h' in col and 'lag3' not in col] + \
        ['Grid load forecast DK1', 'Wind power forecast'] + weekday + ['Min_lag1']
    X = [X for _ in range(24)]
    
    test_size = 1
    model_lm = LinearRegression()
    preds= rolling_window_hourly(model_lm, df_train, df_test, X, Y, weekday, train_size=train_size, test_size=test_size, print_res=True)
    out = [preds.values.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)

def linBW(df_train, df_test, train_size, path, bw_file):
    Y = ['PriceMWh']
    with open(bw_file, 'rb') as fp:
        X = pickle.load(fp)
    
    X = [[variable for variable in x if 'const'!=variable] for x in X]
    weekday = ['Mon', 'Tue', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']

    test_size = 1
    model_lm = LinearRegression()
    preds= rolling_window_hourly(model_lm, df_train, df_test, X, Y, weekday, train_size=train_size, test_size=test_size, print_res=True)
    out = [preds.values.ravel(), df_test[Y].values.ravel()]

    with open(path, 'wb') as fp:
        pickle.dump(out, fp)

# Periode 1: 2019, 2020

In [None]:
# 1
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark1(df_train, df_test, train_size, 'results\Benchmark1_Period1_Year3.txt')

In [None]:
# 2
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark2(df_train, df_test, train_size, 'results\Benchmark2_Period1_Year3.txt')

In [None]:
# 3
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark3(df_train, df_test, train_size, 'results\Benchmark3_Period1_Year3.txt')

In [None]:
# Variable selection
test_start = [2019,7,1]
test_end = [2021,7,1]
train_size = 365*3
df_train, df_test = get_train_test(df, start=test_start, end=test_end)

wind_col = [p for p in df.columns.values if "wind" in p]
solar_col = [p for p in df.columns.values if "solar" in p]
temp_col = [p for p in df.columns.values if "temp" in p]
load_col = [p for p in df.columns.values if "load" in p and '_h' in p]
weekday = ['Mon', 'Tue', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
windprod_col = [p for p in df.columns.values if "Wind" in p in p and '_h' in p]
price_col = [p for p in df.columns.values if "Price" in p and p != 'PriceMWh' and '_h' in p]
min_col = [p for p in df.columns.values if "Min" in p]
max_col = [p for p in df.columns.values if "Max" in p]
avg_col = [p for p in df.columns.values if "Avg" in p]
X = load_col + price_col + windprod_col + weekday + temp_col + solar_col + wind_col + min_col + max_col + avg_col

Y = ['PriceMWh']
X_bs = perform_backward_selection(df_train, X, Y, weekday, train_size)

with open('backward_result_2019.txt', 'wb') as fp:
    pickle.dump(X_bs, fp)

In [None]:
#BW
test_start = [2019,7,1]
test_end = [2021,7,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

linBW(df_train, df_test, train_size, 'results\LinBW_Period1_Year3.txt', 'backward_result_2019.txt')

# Periode 2: 2021, 2022
 

In [None]:
# 1
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark1(df_train, df_test, train_size, 'results\Benchmark1_Period2_Year3.txt')

In [None]:
# 2
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark2(df_train, df_test, train_size, 'results\Benchmark2_Period2_Year3.txt')

In [None]:
# 3
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

benchmark3(df_train, df_test, train_size, 'results\Benchmark3_Period2_Year3.txt')

In [None]:
# Variable selection
test_start = [2021,1,1]
test_end = [2023,1,1]
train_size = 365*3
df_train, df_test = get_train_test(df, start=test_start, end=test_end)

wind_col = [p for p in df.columns.values if "wind" in p]
solar_col = [p for p in df.columns.values if "solar" in p]
temp_col = [p for p in df.columns.values if "temp" in p]
load_col = [p for p in df.columns.values if "load" in p and '_h' in p]
weekday = ['Mon', 'Tue', 'Wen', 'Thur', 'Fri', 'Sat', 'Sun']
windprod_col = [p for p in df.columns.values if "Wind" in p in p and '_h' in p]
price_col = [p for p in df.columns.values if "Price" in p and p != 'PriceMWh' and '_h' in p]
min_col = [p for p in df.columns.values if "Min" in p]
max_col = [p for p in df.columns.values if "Max" in p]
avg_col = [p for p in df.columns.values if "Avg" in p]
X = load_col + price_col + windprod_col + weekday + temp_col + solar_col + wind_col + min_col + max_col + avg_col

Y = ['PriceMWh']
X_bs = perform_backward_selection(df_train, X, Y, weekday, train_size)

with open('backward_result_2021.txt', 'wb') as fp:
    pickle.dump(X_bs, fp)

In [None]:
#BW
test_start = [2021,1,1]
test_end = [2023,1,1]
df_train, df_test = get_train_test(df, start=test_start, end=test_end)
train_size = 365*3

linBW(df_train, df_test, train_size, 'results\LinBW_Period2_Year3.txt', 'backward_result_2021.txt')