In [1]:
zone = 'FI'

In [2]:
import optuna
import pandas as pd
import numpy as np
import os
import glob

from hyperparameters import optimize_hyperparameters
from data import wrangle_data, split_dataframe_by_years, sMAPE

from prediction import predict_over_horizon, predict_over_horizon_hyperparameters, recalibrate_predict_over_horizon, predict_next_day, naive_predict_over_horizon

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

In [3]:
directory = zone
if not os.path.exists(directory):
    os.makedirs(directory)

In [4]:
%%time

df = wrangle_data(source_folder='data', return_dfs=True, store = False, destination_folder=None)[zone]

for test_year in range(2018,2023):
    
    df_train, df_valid, df_test = split_dataframe_by_years(dataframe=df, test_year=test_year, num_validation_years=1, num_train_years=2)
    study_name = zone + '_' + str(test_year)
    optimize_hyperparameters(df_train, df_valid, df_test, study_name=study_name, n_trials=5, n_jobs=4)
    hp_study = optuna.load_study(study_name=study_name, storage='sqlite:///hyperparameter_optimization_trials/' + study_name + '.db')
    hp_params = hp_study.best_params


    preds, metrics = recalibrate_predict_over_horizon(df, df_test, hp_params, calibration_years=3)
    new_metric = sMAPE(preds.values.flatten(), df_test['Price_DA'].values.flatten())


    preds.to_pickle(directory + '/' + str(test_year)  +'_preds.pkl')
    metrics.to_pickle(directory + '/' + str(test_year)  +'_metrics.pkl')


[32m[I 2023-06-12 15:06:09,179][0m Using an existing study with name 'FI_2018' instead of creating a new one.[0m


Directory 'C:\Users\z110474\Time Series Paper\Modular_Codes- V 6.1\hyperparameter_optimization_trials' already exists, using it to store the results of the hyperparameter optimization!


[32m[I 2023-06-12 15:06:20,213][0m Trial 43 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:06:20,423][0m Trial 42 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:06:21,197][0m Trial 41 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-06-12 15:06:26,400][0m Trial 46 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:06:26,465][0m Trial 44 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-06-12 15:06:30,946][0m Trial 45 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-06-12 15:06:31,532][0m Trial 47 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:06:31,633][0m Trial 48 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:06:33,223][0m Trial 49 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-06-12 15:07:16,387][0m Trial 40 finished with value: 3.6705022247654546 and parameters: {'n_hidden': 4, 'learning_rate': 0.002191498554389462, 'batch_size': 42, 'batch_normalization': True, 'use_hist_2':

MAE for Validation Set is: 3.67 | sMAPE for Validation Set is: 10.71% | rMAE for Validation Set is: 0.64
MAE for Test Set is: 8.71 | sMAPE for Test Set is: 19.10% | rMAE for Test Set is: 1.10
for 2018-01-01, MAE is:1.90 & sMAPE is:8.25% & rMAE is:1.01 ||| daily mean of MAE & sMAPE & rMAE till now are :1.90 & 8.25% & 1.01
for 2018-01-02, MAE is:2.11 & sMAPE is:6.50% & rMAE is:0.34 ||| daily mean of MAE & sMAPE & rMAE till now are :2.00 & 7.37% & 0.67
for 2018-01-03, MAE is:2.89 & sMAPE is:9.75% & rMAE is:0.62 ||| daily mean of MAE & sMAPE & rMAE till now are :2.30 & 8.17% & 0.66
for 2018-01-04, MAE is:1.99 & sMAPE is:5.90% & rMAE is:0.64 ||| daily mean of MAE & sMAPE & rMAE till now are :2.22 & 7.60% & 0.65
for 2018-01-05, MAE is:2.67 & sMAPE is:7.92% & rMAE is:1.37 ||| daily mean of MAE & sMAPE & rMAE till now are :2.31 & 7.66% & 0.79
for 2018-01-06, MAE is:2.83 & sMAPE is:9.40% & rMAE is:0.67 ||| daily mean of MAE & sMAPE & rMAE till now are :2.40 & 7.95% & 0.77
for 2018-01-07, MAE is

for 2018-02-20, MAE is:6.29 & sMAPE is:11.57% & rMAE is:0.39 ||| daily mean of MAE & sMAPE & rMAE till now are :5.09 & 11.50% & 0.74
for 2018-02-21, MAE is:5.78 & sMAPE is:9.99% & rMAE is:0.38 ||| daily mean of MAE & sMAPE & rMAE till now are :5.11 & 11.47% & 0.73
for 2018-02-22, MAE is:6.00 & sMAPE is:10.53% & rMAE is:0.38 ||| daily mean of MAE & sMAPE & rMAE till now are :5.12 & 11.45% & 0.72
for 2018-02-23, MAE is:4.31 & sMAPE is:9.01% & rMAE is:0.82 ||| daily mean of MAE & sMAPE & rMAE till now are :5.11 & 11.41% & 0.73
for 2018-02-24, MAE is:3.95 & sMAPE is:9.49% & rMAE is:1.69 ||| daily mean of MAE & sMAPE & rMAE till now are :5.09 & 11.37% & 0.74
for 2018-02-25, MAE is:3.06 & sMAPE is:7.51% & rMAE is:1.19 ||| daily mean of MAE & sMAPE & rMAE till now are :5.05 & 11.30% & 0.75
for 2018-02-26, MAE is:4.67 & sMAPE is:8.84% & rMAE is:0.89 ||| daily mean of MAE & sMAPE & rMAE till now are :5.04 & 11.26% & 0.75
for 2018-02-27, MAE is:5.48 & sMAPE is:9.95% & rMAE is:1.16 ||| daily mean

for 2018-04-23, MAE is:12.07 & sMAPE is:27.05% & rMAE is:1.28 ||| daily mean of MAE & sMAPE & rMAE till now are :5.23 & 10.44% & 1.00
for 2018-04-24, MAE is:4.13 & sMAPE is:11.98% & rMAE is:0.46 ||| daily mean of MAE & sMAPE & rMAE till now are :5.22 & 10.46% & 0.99
for 2018-04-25, MAE is:10.27 & sMAPE is:27.18% & rMAE is:3.16 ||| daily mean of MAE & sMAPE & rMAE till now are :5.27 & 10.60% & 1.01
for 2018-04-26, MAE is:4.84 & sMAPE is:11.42% & rMAE is:1.28 ||| daily mean of MAE & sMAPE & rMAE till now are :5.26 & 10.61% & 1.02
for 2018-04-27, MAE is:12.33 & sMAPE is:23.01% & rMAE is:0.92 ||| daily mean of MAE & sMAPE & rMAE till now are :5.32 & 10.72% & 1.01
for 2018-04-28, MAE is:5.70 & sMAPE is:15.28% & rMAE is:0.79 ||| daily mean of MAE & sMAPE & rMAE till now are :5.33 & 10.75% & 1.01
for 2018-04-29, MAE is:3.50 & sMAPE is:10.61% & rMAE is:2.52 ||| daily mean of MAE & sMAPE & rMAE till now are :5.31 & 10.75% & 1.03
for 2018-04-30, MAE is:6.81 & sMAPE is:18.61% & rMAE is:0.56 ||| d

KeyboardInterrupt: 

In [5]:
files = glob.glob(zone +'/'+"*preds.pkl")
data_frames = []
for file in files:
    with open(file, 'rb') as f:
        data_frames.append(pd.read_pickle(f))

# concatenate all DataFrames in the list into one DataFrame
Predictions = pd.concat(data_frames)

ValueError: No objects to concatenate

In [None]:
files = glob.glob(zone +'/'+"*metrics.pkl")
data_frames = []
for file in files:
    with open(file, 'rb') as f:
        data_frames.append(pd.read_pickle(f))

# concatenate all DataFrames in the list into one DataFrame
Metrics = pd.concat(data_frames)

In [None]:
Predictions.to_csv(directory + '/' + 'Predictions.csv')

In [None]:
Metrics.to_csv(directory + '/' + 'Metrics.csv')

In [None]:
Metrics['sMAPE'].mean()

In [None]:
Metrics['sMAPE'].resample('M').mean().plot()
(100 * df['Price_DA'].loc['2018':'2022'].resample('M').std() / df['Price_DA'].loc['2018':'2022'].resample('M').mean()).plot()

In [None]:
(100 * df['Price_DA'].loc['2018':'2022'].resample('M').std() / df['Price_DA'].loc['2018':'2022'].resample('M').mean()).plot()

In [None]:
df['Price_DA'].loc['2020':'2021']

In [None]:
Predictions['Price_DA'].loc['2020':'2021']

In [None]:
sMAPE(df['Price_DA'].loc['2018':'2019'],Predictions['Price_DA'].loc['2018':'2019'] )

In [None]:
Metrics['MAE'].loc['2020':'2021'].mean()

In [None]:
Metrics[:'2018'].mean()

In [None]:
study_name = zone + '_' + str(2018)

In [None]:
my_study = optuna.load_study(study_name=study_name, storage='sqlite:///hyperparameter_optimization_trials/' + study_name + '.db')

In [None]:
my_study.best_trial.user_attrs

In [None]:
df.loc['2018']['Price_DA']

In [None]:
Predictions.loc['2018']['Price_DA'].values

In [None]:
sMAPE(df.loc['2018']['Price_DA'], Predictions.loc['2018']['Price_DA'].values )

In [None]:
mean_absolute_error(df.loc['2018']['Price_DA'], Predictions.loc['2018']['Price_DA'].values )

In [None]:
test_predictions, test_MAE, test_sMAPE = naive_predict_over_horizon(df=df, horizon=df.loc['2018'])

In [None]:
test_MAE

In [None]:
plt.figure(figsize=(20,10))
plt.plot(df['2018':]['Price_DA'].resample('W').mean())
plt.plot(Predictions['Price_DA'].resample('W').mean())

In [None]:
plt.figure(figsize=(20,10))
plt.plot(df.loc['2022']['Price_DA'])
plt.plot(Predictions.loc['2022']['Price_DA'], alpha=0.5)

In [None]:
Metrics['rMAE'].resample('M').mean().plot()

In [None]:
Metrics['rMAE'].mean()

In [None]:
Predictions.loc['2019-06']

In [None]:
def calculate_smape(actual, forecast):
    # Symmetric Mean Absolute Percentage Error
    return 100 * 2 * np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast))

In [None]:
errors = pd.DataFrame(index = Predictions.index)

In [None]:
errors['sMAPE'] = [calculate_smape(actual, predicted) for actual, predicted in zip(df['Price_DA'].loc['2018':'2022'], Predictions['Price_DA'])]

In [None]:
errors['sMAPE'].mean()

In [None]:
Metrics['sMAPE'].mean()