In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Set random seed for reproducibility
np.random.seed(0)

# Loading in dataset

In [2]:
df = pd.read_csv('../datasets/complete_data/df.csv')

In [3]:
for i in range(1, 25): # 24 Lags
    df[f'lag_{i}'] = df['Day-ahead Price [EUR/MWh] BZN|NO1'].shift(i)

In [4]:
df = df.dropna()
features = [f'lag_{i}' for i in range(1, 25)]  # 24 Lags
target = 'Day-ahead Price [EUR/MWh] BZN|NO1'

# 80% for training, 10% for validation, 10% for testing
train_size = int(len(df) * 0.8)
val_size = int(len(df) * 0.1)
train, validate, test = np.split(df.sample(frac=1), [train_size, train_size+val_size])

X_train = train[features]
y_train = train[target]
X_val = validate[features]
y_val = validate[target]
X_test = test[features]
y_test = test[target]

In [5]:
parameters = {'kernel': ['linear', 'poly', 'rbf'], 'C':[1.5, 10],'gamma': [1e-7, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}
svr = SVR(max_iter=1000) # 'max_iter=' controls the amount of iterations.
clf = GridSearchCV(svr, parameters, n_jobs=-1)
clf.fit(X_train, y_train)



GridSearchCV(estimator=SVR(max_iter=1000), n_jobs=-1,
             param_grid={'C': [1.5, 10], 'epsilon': [0.1, 0.2, 0.5, 0.3],
                         'gamma': [1e-07, 0.0001],
                         'kernel': ['linear', 'poly', 'rbf']})

In [6]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [7]:
y_train_pred = clf.predict(X_train)

mae = mean_absolute_error(y_train, y_train_pred)
print('Training MAE: ', mae)

mse = mean_squared_error(y_train, y_train_pred)
print('Training MSE: ', mse)

rmse = np.sqrt(mse)
print('Training RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_train, y_train_pred)
print('Training MAPE: ', mape)

Training MAE:  31.079594443440058
Training MSE:  1334.7892358204404
Training RMSE:  36.53476749372357
Training MAPE:  92.72593077984622


In [8]:
y_val_pred = clf.predict(X_val)

mae = mean_absolute_error(y_val, y_val_pred)
print('Validation MAE: ', mae)

mse = mean_squared_error(y_val, y_val_pred)
print('Validation MSE: ', mse)

rmse = np.sqrt(mse)
print('Validation RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_val, y_val_pred)
print('Validation MAPE: ', mape)

Validation MAE:  31.04644023145066
Validation MSE:  1336.6663064682832
Validation RMSE:  36.56044729579062
Validation MAPE:  44.916447463985


In [9]:
y_test_pred = clf.predict(X_test)

mae = mean_absolute_error(y_test, y_test_pred)
print('Test MAE: ', mae)

mse = mean_squared_error(y_test, y_test_pred)
print('Test MSE: ', mse)

rmse = np.sqrt(mse)
print('Test RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_test, y_test_pred)
print('Test MAPE: ', mape)

Test MAE:  32.182616697886516
Test MSE:  1417.0182072861178
Test RMSE:  37.6433022898645
Test MAPE:  84.96033930904906


In [10]:
# Add a naive forecast to your train, validation, and test sets
train['naive_forecast'] = train['Day-ahead Price [EUR/MWh] BZN|NO1'].shift(1)
validate['naive_forecast'] = validate['Day-ahead Price [EUR/MWh] BZN|NO1'].shift(1)
test['naive_forecast'] = test['Day-ahead Price [EUR/MWh] BZN|NO1'].shift(1)

# Evaluate the baseline model on the training set
y_train_naive = train['naive_forecast'].dropna()
y_true_train = train['Day-ahead Price [EUR/MWh] BZN|NO1'][1:]  # Drop the first value to align with the naive forecast

mae = mean_absolute_error(y_true_train, y_train_naive)
print('Training Baseline MAE: ', mae)

mse = mean_squared_error(y_true_train, y_train_naive)
print('Training Baseline MSE: ', mse)

rmse = np.sqrt(mse)
print('Training Baseline RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_true_train, y_train_naive)
print('Training Baseline MAPE: ', mape)

# Evaluate the baseline model on the validation set
y_val_naive = validate['naive_forecast'].dropna()
y_true_val = validate['Day-ahead Price [EUR/MWh] BZN|NO1'][1:]

mae = mean_absolute_error(y_true_val, y_val_naive)
print('Validation Baseline MAE: ', mae)

mse = mean_squared_error(y_true_val, y_val_naive)
print('Validation Baseline MSE: ', mse)

rmse = np.sqrt(mse)
print('Validation Baseline RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_true_val, y_val_naive)
print('Validation Baseline MAPE: ', mape)

# Evaluate the baseline model on the test set
y_test_naive = test['naive_forecast'].dropna()
y_true_test = test['Day-ahead Price [EUR/MWh] BZN|NO1'][1:]

mae = mean_absolute_error(y_true_test, y_test_naive)
print('Test Baseline MAE: ', mae)

mse = mean_squared_error(y_true_test, y_test_naive)
print('Test Baseline MSE: ', mse)

rmse = np.sqrt(mse)
print('Test Baseline RMSE: ', rmse)

mape = mean_absolute_percentage_error(y_true_test, y_test_naive)
print('Test Baseline MAPE: ', mape)

Training Baseline MAE:  108.24721365401993
Training Baseline MSE:  21993.19411973249
Training Baseline RMSE:  148.30102534956558
Training Baseline MAPE:  328.9059809155257
Validation Baseline MAE:  114.14762802018834
Validation Baseline MSE:  24585.645623725624
Validation Baseline RMSE:  156.79810465603728
Validation Baseline MAPE:  172.07996810656684
Test Baseline MAE:  106.40514929060686
Test Baseline MSE:  21340.436886882962
Test Baseline RMSE:  146.08366399732367
Test Baseline MAPE:  280.73046961636464
