In [None]:
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from Energy.HelpFunctions.get_energy_data import get_energy_data, prepare_data
from Energy.HelpFunctions.get_energy_data import get_energy_data, prepare_data
from HelpFunctions.date_and_time import most_recent_thursday, split_time
from Energy.Models.baseline import baseline
from HelpFunctions.calc_score import evaluate_horizon
from Energy.Models.Model1 import model1
from Energy.Models.Model2 import model2

# import importlib
# importlib.reload(Energy.Models)

In [None]:

df = get_energy_data()
df = prepare_data(df)

# Cross validate baseline model

Remove everything til last thursday night 12pm

In [None]:
start_date_excl = most_recent_thursday(df)
df_cval = df.loc[df.index < start_date_excl]

In [None]:
# baseline(df_cval, LAST_IDX=-1)
# model1(df_cval)

Repeatedly run the model. Record predictions and true values (observations). Make sure the observations are available for the most recent prediction.

In [None]:
evaluation = pd.DataFrame()
df_before = df_cval
for w in range(100):
    # Create a temp df that only contains the values used for training
    df_before, df_after = split_time(df_before, num_weeks=1)
    pred = baseline(df_before, LAST_IDX=-1)
    # pred = model1(df_before)
    # Transform the observations to timeseries with same index and then merge
    obs = pd.DataFrame({'gesamt': df_cval.loc[pred['forecast_date']]["gesamt"]})
    pred = pred.set_index('forecast_date')
    merged_df = pd.merge(pred, obs, left_index=True, right_index=True) 
    
    # Add scores to the merged_df
    for index, row in merged_df.iterrows():
        quantile_preds = row[['q0.025','q0.25','q0.5','q0.75','q0.975']]
        observation = row['gesamt']
        score = evaluate_horizon(quantile_preds, observation)
        merged_df.at[index, 'score'] = score
    # print(merged_df[['q0.025','q0.25','q0.5','q0.75','q0.975']])
    evaluation = pd.concat([evaluation, merged_df])
    

In [None]:
evaluation[evaluation['horizon'] == '36 hour']

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Example data (replace with your actual data)
data = evaluation['score']

# Kernel density estimate plot
sns.kdeplot(data, fill=True)

# Adding labels and title
plt.xlabel('X-axis Label')
plt.ylabel('Density')
plt.title('Kernel Density Estimate Plot')

# Display the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt

horizons = ['36 hour', '40 hour', '44 hour', '60 hour', '64 hour', '68 hour']

# for h in horizons:

ax = evaluation[evaluation['horizon'] == '36 hour'].plot(kind='line',  linestyle='-')

# Adding labels and title
plt.xlabel('Forecast Date')
plt.ylabel('Score')
plt.title('Score Over Time: ' + '36 hour')
ax.set_ylim(0, 80)
# Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# List of horizons
horizons = ['36 hour', '40 hour', '44 hour', '60 hour', '64 hour', '68 hour']

# Plotting all lines in one plot
plt.figure()  # Create a new figure
for h in horizons:
    subset = evaluation[evaluation['horizon'] == h]
    plt.plot(subset.index, subset['score'], label=h)

# Adding labels and title
plt.xlabel('Forecast Date')
plt.ylabel('Score')
plt.title('Score Over Time')

# Setting y-axis limit
plt.ylim(0,55)

# Adding legend
plt.legend(title='Horizon')

# Display the plot
plt.show()

It can be seen that the heavy outliers are 36 and 44 hour. Is it due to workdays (where production is still active) or not active?
==> CHeck for Holydays

YES! The orange outliers seem to be on Karfreitag.

It can also be seen, that 60-68 hour-forecasts seem to have less extreme outliers

In [None]:

def evaluate_models(models, df, last_x, years =False, months=False, weeks=False):
    # Check that exactly one of the boolean parameters is True
    if sum([years, months, weeks]) != 1:
        raise ValueError("Exactly one of the boolean parameters (years, months, weeks) must be True.")
    
    years = int(years)
    months = int(months)
    weeks = int(weeks)
        
    for m in models:
        print(f'*********** Start the evaluation of Model {m["name"]} ***********')
        m['evaluation'] = evaluate_model(m, df, last_x, years, months, weeks)
        
def evaluate_model(model, df, last_x, years, months, weeks):
    df_before = df
    evaluation = pd.DataFrame()
    
    for w in range(last_x):
        print(f'Iteration {w} of {last_x}')
        df_before, df_after = split_time(df_before, num_years=years, num_months=months, num_weeks=weeks)
        pred = model['function'](df_before)
        obs = pd.DataFrame({'gesamt': df.loc[pred['forecast_date']]["gesamt"]})
        pred = pred.set_index('forecast_date')
        merged_df = pd.merge(pred, obs, left_index=True, right_index=True) 
    
    
         # Add scores to the merged_df
        for index, row in merged_df.iterrows():
            quantile_preds = row[['q0.025','q0.25','q0.5','q0.75','q0.975']]
            observation = row['gesamt']
            score = evaluate_horizon(quantile_preds, observation)
            merged_df.at[index, 'score'] = score
        # print(merged_df[['q0.025','q0.25','q0.5','q0.75','q0.975']])
        evaluation = pd.concat([evaluation, merged_df])
    return evaluation

## Test the evaluation with baseline and model1 and model2

In [None]:
from Energy.Models.Model3 import model3

models = [
    {
        'name': 'baseline',
        'function': baseline
     },
    {
        'name': 'model1',
        'function': model1
    },
    {
        'name': 'model2',
        'function': model2
    },
    {
        'name': 'model3',
        'function': model3
    }
]

start_date_excl = most_recent_thursday(df)
df_cval = df.loc[df.index < start_date_excl]

In [None]:
evaluate_models(models, df_cval, last_x=100, weeks=True)

### Save evaluations in pkl file

In [None]:
with open('./Model evaluations/baseline_m1_m2_m3.pkl', 'wb') as f:
    pickle.dump(models, f)
        
# with open('model_evaluations.pkl', 'rb') as f:
#     models = pickle.load(f)

### Create a table that only contains the different scores of the different models

In [None]:
# scores = [m['evaluation']['score'][m['evaluation']['horizon'] == '36 hour'] for m in models]
# horizons = ['36 hour', '40 hour', '44 hour', '60 hour', '64 hour', '68 hour']
# 
# 
# names = [m['name'] for m in models]
# score_df = pd.concat(scores, axis=1,keys=names)

In [None]:
horizons = ['36 hour', '40 hour', '44 hour', '60 hour', '64 hour', '68 hour']
scores = []
names = []
for h in horizons:
    for m in models:
        col_name = f'{m["name"]}: {h}'
        scores.append(m['evaluation']['score'][m['evaluation']['horizon'] == h])
        names.append(col_name)

score_df = pd.concat(scores, axis=1,keys=names)

        

In [None]:
score_df

### Plot the densities of the obtained scores

In [None]:
sns.kdeplot(data=score_df.loc[:,['baseline: 36 hour','model1: 36 hour','model3: 36 hour']], fill=True)

# Adding labels and title
plt.xlabel('Score')
plt.ylabel('Density')
plt.title('Density Plot of Scores: 36 hour horizon')

# Display the plot
plt.show()

In [None]:
# Plot values from two columns over time
score_df_36_no_na = score_df[score_df['baseline: 36 hour'].notna()]
# plt.plot(score_df_36_no_na.index, score_df_36_no_na['baseline: 36 hour'], label='baseline')
plt.plot(score_df_36_no_na.index, score_df_36_no_na['model1: 36 hour'], label='model1')
# plt.plot(score_df_36_no_na.index, score_df_36_no_na['model2: 36 hour'], label='model2')
plt.plot(score_df_36_no_na.index, score_df_36_no_na['model3: 36 hour'], label='model3')


# Adding labels and title
plt.xlabel('time')
plt.ylabel('score')
plt.title('Comparison of scores over time')

# Display legend
plt.legend()

# Show the plot
plt.show()