# Imports

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import time


from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import inspect
import warnings
warnings.filterwarnings("ignore")

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
plt.style.use('seaborn-colorblind')
sns.set_theme()
%matplotlib inline
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (20, 10),
          'figure.titlesize': 'x-large',
          'figure.dpi': 150,
         'axes.labelsize': 'x-large',
         'axes.titlesize': 'x-large',
         'xtick.labelsize': 'x-large',
         'ytick.labelsize': 'x-large',
         'savefig.bbox': 'tight'}

plt.rcParams.update(params)
# plt.rcParams.keys() ### Use to check the available parameters

In [2]:
%run ts_processing.py
%run ts_modelling.py

In [3]:
df = pd.read_csv("NordPool/Spot_Price_Nordics.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)
df['datetime']= pd.to_datetime(df['datetime'])


In [4]:
df.columns

Index(['NO1', 'datetime', 'NO3', 'NO5', 'SE1', 'SE3', 'SE4', 'DK1', 'DK2',
       'weekday', 'week', 'day', 'hour'],
      dtype='object')

# Calculate Baselines

In [5]:
######### Prepare dataframe to get Baselines #########
df["hour"] = df["datetime"].dt.hour
df["dayofweek"] = df["datetime"].dt.weekday
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
# Get Dataframes that correspond to increasingly simple baselines
base2 = df.groupby(["day", "hour"])[['NO1','NO3', 'NO5', 'SE1', 'SE3', 'SE4', 'DK1', 'DK2']].mean()
base2.to_csv("Plots/Baselines/base2_nordics.csv")
base2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,NO1,NO3,NO5,SE1,SE3,SE4,DK1,DK2
day,hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0,40.086452,30.625,40.114355,30.434839,32.594516,37.783871,41.172903,41.54
1,1,40.017581,30.596613,40.029516,29.885484,31.612097,34.996774,38.985968,37.168387
1,2,38.541129,29.421774,38.543871,28.219355,29.916774,32.71629,37.292742,34.785645
1,3,37.593065,28.82,37.600645,27.510645,29.147419,31.725161,35.764516,33.408226
1,4,36.285968,28.803065,36.390484,27.425968,28.982742,31.589194,34.824194,33.115484


In [6]:
base2 = pd.merge(df, base2, how = "left", on = ["day", "hour"], suffixes=(None,'_base'))
base2.head()

Unnamed: 0,NO1,datetime,NO3,NO5,SE1,SE3,SE4,DK1,DK2,weekday,...,dayofweek,month,NO1_base,NO3_base,NO5_base,SE1_base,SE3_base,SE4_base,DK1_base,DK2_base
0,29.36,2017-01-02 00:00:00,29.36,29.36,29.36,29.36,29.36,29.36,29.36,0,...,0,1,39.615079,29.565397,39.615079,29.513175,30.706667,36.988095,40.761746,40.021587
1,29.4,2017-01-02 01:00:00,29.4,29.4,29.4,29.4,29.4,29.4,29.4,0,...,0,1,39.423968,28.775556,39.439524,27.759683,29.884127,33.922857,39.053175,37.401905
2,28.91,2017-01-02 02:00:00,28.91,28.91,28.91,28.91,28.91,28.91,28.91,0,...,0,1,38.679524,28.151905,38.695079,26.495556,28.447937,31.341746,36.206825,34.674921
3,28.11,2017-01-02 03:00:00,28.11,28.11,28.11,28.11,28.11,28.11,28.11,0,...,0,1,38.012857,27.985873,38.040794,26.214921,28.173016,30.74746,35.155873,33.894603
4,28.24,2017-01-02 04:00:00,27.89,28.24,27.89,27.89,27.89,27.89,27.89,0,...,0,1,37.873175,28.317778,37.913492,26.580317,28.616825,31.115238,35.018571,34.038095


In [7]:
base2.columns

Index(['NO1', 'datetime', 'NO3', 'NO5', 'SE1', 'SE3', 'SE4', 'DK1', 'DK2',
       'weekday', 'week', 'day', 'hour', 'dayofweek', 'month', 'NO1_base',
       'NO3_base', 'NO5_base', 'SE1_base', 'SE3_base', 'SE4_base', 'DK1_base',
       'DK2_base'],
      dtype='object')

# Define comparison period

In [8]:
df.tail()

Unnamed: 0,NO1,datetime,NO3,NO5,SE1,SE3,SE4,DK1,DK2,weekday,week,day,hour,dayofweek,month
45499,204.15,2022-03-12 19:00:00,14.09,204.15,17.03,204.15,204.15,210.25,210.25,5,10,12,19,5,3
45500,198.64,2022-03-12 20:00:00,14.14,198.64,16.08,198.64,198.64,204.13,198.64,5,10,12,20,5,3
45501,183.74,2022-03-12 21:00:00,14.24,183.74,15.82,168.85,168.85,173.0,170.43,5,10,12,21,5,3
45502,182.15,2022-03-12 22:00:00,14.24,182.15,15.59,123.77,123.77,176.5,153.95,5,10,12,22,5,3
45503,182.95,2022-03-12 23:00:00,14.34,182.95,15.55,103.01,103.01,182.51,173.39,5,10,12,23,5,3


In [9]:
#### Check values in Last period, 2021-12-17 00:00:00 to 2022-03-12 23:00:00
test_size = 96 # 4 days
train_size = 1968 # 82 days
df.iloc[len(df)-(test_size+train_size):].tail()

Unnamed: 0,NO1,datetime,NO3,NO5,SE1,SE3,SE4,DK1,DK2,weekday,week,day,hour,dayofweek,month
45499,204.15,2022-03-12 19:00:00,14.09,204.15,17.03,204.15,204.15,210.25,210.25,5,10,12,19,5,3
45500,198.64,2022-03-12 20:00:00,14.14,198.64,16.08,198.64,198.64,204.13,198.64,5,10,12,20,5,3
45501,183.74,2022-03-12 21:00:00,14.24,183.74,15.82,168.85,168.85,173.0,170.43,5,10,12,21,5,3
45502,182.15,2022-03-12 22:00:00,14.24,182.15,15.59,123.77,123.77,176.5,153.95,5,10,12,22,5,3
45503,182.95,2022-03-12 23:00:00,14.34,182.95,15.55,103.01,103.01,182.51,173.39,5,10,12,23,5,3


In [10]:
### Slice data
df = df.iloc[len(df)-(test_size+train_size):]
base2 = base2.iloc[len(base2)-(test_size+train_size):]

In [11]:
base2.head()

Unnamed: 0,NO1,datetime,NO3,NO5,SE1,SE3,SE4,DK1,DK2,weekday,...,dayofweek,month,NO1_base,NO3_base,NO5_base,SE1_base,SE3_base,SE4_base,DK1_base,DK2_base
43440,145.1,2021-12-17 00:00:00,19.95,145.1,19.95,19.95,19.95,149.44,19.95,4,...,4,12,39.615,29.164355,39.054194,29.934194,30.548871,33.930806,41.363548,38.497097
43441,160.36,2021-12-17 01:00:00,19.36,160.36,19.36,19.36,19.36,165.15,19.36,4,...,4,12,39.684194,28.599355,39.677097,29.131935,29.392903,33.688226,37.209194,34.675968
43442,156.53,2021-12-17 02:00:00,19.06,156.53,19.06,19.06,19.06,161.2,19.06,4,...,4,12,38.673548,27.840968,38.672097,28.135806,28.165484,31.754516,36.022419,32.022419
43443,153.76,2021-12-17 03:00:00,19.15,153.76,19.15,19.15,19.15,158.35,19.15,4,...,4,12,37.941774,27.202903,37.939194,27.415161,27.442258,30.895645,35.160484,30.966774
43444,152.6,2021-12-17 04:00:00,18.79,152.6,18.79,18.79,18.79,157.15,18.79,4,...,4,12,37.472097,26.820161,37.468548,27.02371,27.042903,30.410806,34.601129,30.369032


In [12]:
regions = ['NO1', 'NO3', 'NO5', 'SE1', 'SE3', 'SE4', 'DK1', 'DK2']
rmse = {}
mape = {}
for reg in regions:
    rmse[f'{reg}']=(np.sqrt(mean_squared_error(base2[reg], base2[f'{reg}_base'])))
    mape[f'{reg}']=calculate_mape(base2[reg], base2[f'{reg}_base'])
    print(reg, rmse[reg], mape[reg])
mape.keys() # Check that it is not in the 0.01 range

NO1 122.4345037056856 69.50783911830935
NO3 19.714497004255136 89.44661550355232
NO5 120.89955834442601 69.51610077721381
SE1 26.33479735745647 89.38347780113773
SE3 112.12479721294436 76.24773745736145
SE4 118.27712601416202 79.87433181155023
DK1 151.9717543214473 inf
DK2 150.88168814230036 447.9740294263943


dict_keys(['NO1', 'NO3', 'NO5', 'SE1', 'SE3', 'SE4', 'DK1', 'DK2'])

In [13]:
rmse

{'NO1': 122.4345037056856,
 'NO3': 19.714497004255136,
 'NO5': 120.89955834442601,
 'SE1': 26.33479735745647,
 'SE3': 112.12479721294436,
 'SE4': 118.27712601416202,
 'DK1': 151.9717543214473,
 'DK2': 150.88168814230036}

In [42]:
mape

{'NO1': 69.50783911830935,
 'NO3': 89.44661550355232,
 'NO5': 69.51610077721381,
 'SE1': 89.38347780113773,
 'SE3': 76.24773745736145,
 'SE4': 79.87433181155023,
 'DK1': inf,
 'DK2': 447.9740294263943}