In [None]:
from HelpFunctions.date_and_time import most_recent_thursday, split_time
from DAX.HelpFunctions.get_dax_data import get_dax_data
from datetime import timedelta
from HelpFunctions.calc_score import evaluate_horizon
from HelpFunctions.mix_models import mix_models
import pandas as pd
from DAX.Models.baseline import baseline
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
# import importlib
# importlib.reload(Energy.Models)

In [None]:
df = get_dax_data()

In [None]:
# We have to start with the tuesday data
start_date_excl = most_recent_thursday(df) - timedelta(days=1)
df_cval = df.loc[df.index < start_date_excl]
df_cval.index = df_cval.index.date

In [None]:
df_cval.tail()

In [None]:
def evaluate_models(models, df, last_x, years =False, months=False, weeks=False):
    # Check that exactly one of the boolean parameters is True
    if sum([years, months, weeks]) != 1:
        raise ValueError("Exactly one of the boolean parameters (years, months, weeks) must be True.")
    
    years = int(years)
    months = int(months)
    weeks = int(weeks)
        
    for m in models:
        print(f'*********** Start the evaluation of Model {m["name"]} ***********')
        m['evaluation'] = evaluate_model(m, df, last_x, years, months, weeks)
        
def evaluate_model(model, df, last_x, years, months, weeks):
    df_before = df
    evaluation = pd.DataFrame()
    
    for w in range(last_x):
        print(f'Iteration {w+1} of {last_x}')
        df_before, df_after = split_time(df_before, num_years=years, num_months=months, num_weeks=weeks)
        
        pred = None
        # Is mixed model?
        if callable(model['function']):
            pred = model['function'](df_before)
        else: 
            pred = mix_models(model['function'][0], model['function'][1], df_before, target='DAX')
        # Makes sure we try to find observations for dates that have an observation (e.g. 05-01 is missing)
        dates = [[horizon+1, d] for horizon,d in enumerate(pred['forecast_date']) if d in df.index]
        pred = pred.set_index('forecast_date')

        
        # Add observations to pred
        pred['observation'] = ''
        pred['score'] = ''
        for h,d in dates:
            # Look up the right observation for date d
            o = df.loc[d][f'ret{h}']
            pred.loc[d,'observation'] = o
    
        # Add scores to pred
        for index, row in pred.iterrows():
            quantile_preds = row[['q0.025','q0.25','q0.5','q0.75','q0.975']]
            observation = row['observation']
            
            score = evaluate_horizon(quantile_preds, observation)
            
            pred.at[index, 'score'] = score
            
        evaluation = pd.concat([evaluation, pred])
    evaluation = evaluation.sort_index()
    
    return evaluation

In [None]:
from DAX.Models import arma_garch_11 as ag
import importlib
importlib.reload(ag)

models = [
    {
        'name': 'garch11_norm_1y',
        'function': ag.garch11_norm_1y
    },
    {
        'name': 'garch11_norm_2y',
        'function': ag.garch11_norm_2y
    },
    {
        'name': 'garch11_norm_5y',
        'function': ag.garch11_norm_5y
    },
]



In [None]:
evaluate_models(models, df, 50, weeks=True)

In [None]:
import numpy as np


model_index = 2
results_per_horizon = []

models[model_index]['evaluation'] = models[model_index]['evaluation'].dropna()

for h in [f'{h} day' for h in [1, 2, 5, 6, 7]]:
    results_per_horizon.append(models[model_index]['evaluation'][models[model_index]['evaluation']['horizon'] == h])

# results36h = models[0]['evaluation'][models[0]['evaluation']['horizon'] == '36 hour']



# results = models[0]['evaluation']



## Check Calibration manually

## Create PIT Histogram

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# import string
# import random
# import scipy.stats as stats
# 
# from scipy.stats import uniform
# 
# for index, r in enumerate(results_per_horizon):
#     print(r['observation'])
#     obs = np.array(np.array(r['observation']),dtype=float)
#     forecasts = np.array(np.array(r.loc[:,'q0.025':'q0.975']),dtype=float)
# 
#     # Example quantile forecasts and observations
#     quantile_forecasts = forecasts
#     observations = obs
#     
#     # Calculate PIT values
#     pit_values = np.zeros_like(quantile_forecasts)
#     
#     for i in range(quantile_forecasts.shape[1]):
#         sorted_forecasts = np.sort(quantile_forecasts[:, i])
#         ecdf_values = np.linspace(0, 1, len(sorted_forecasts))
#         pit_values[:, i] = np.interp(
#             quantile_forecasts[:, i], sorted_forecasts, ecdf_values)
#     
#     # Create PIT histogram
#     fig, ax = plt.subplots()
#     ax.hist(pit_values.flatten(), bins=20, density=True,
#             alpha=0.75, color='blue', edgecolor='black')
#     
#     # Plot the uniform distribution for reference
#     x = np.linspace(0, 1, 100)
#     ax.plot(x, uniform.pdf(x), 'r-', lw=2, label='Uniform [0,1]')
#     
#     ax.set_title('Probability Integral Transform (PIT)')
#     ax.set_xlabel('PIT Values')
#     ax.set_ylabel('Density')
#     ax.legend()
#     
#     
#     plt.savefig(f'../plots/{index} {"".join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(4))}.png')
#     plt.show()

## Check Manually

In [None]:
for model_index in range(len(models)):
    results_per_horizon = []
    
    models[model_index]['evaluation'] = models[model_index]['evaluation'].dropna()
    
    for h in [f'{h} day' for h in [1, 2, 5, 6, 7]]:
        results_per_horizon.append(models[model_index]['evaluation'][models[model_index]['evaluation']['horizon'] == h])



    array = []
    
    for q in ['q0.025', 'q0.25', 'q0.5', 'q0.75', 'q0.975']:
        # print(f'*** {q} ***')
        # arr = models[5]['evaluation'][q] > models[5]['evaluation']['gesamt']
        # print(arr.mean())
        
        per_quantile = []
        
        
        for index, r in enumerate(results_per_horizon):
            # r.loc[:, f'{q}larger'] = r[q] > r['gesamt']
            mean = (r[q] > r['observation']).mean()
            # print(r[f'{q}larger'].mean())
            per_quantile.append(round(mean,3))
        per_quantile.append(round((models[model_index]['evaluation'][q] > models[model_index]['evaluation']['observation']).mean(), 3))
        
        array.append(per_quantile)
            
    print(pd.DataFrame(array, columns=['1 day', '2 day', '5 day', '6 day', '7 day', 'All horizons'], index=['q0.025', 'q0.25', 'q0.5', 'q0.75', 'q0.975']))

