In [None]:
from HelpFunctions.date_and_time import most_recent_thursday, split_time
from DAX.HelpFunctions.get_dax_data import get_dax_data
from datetime import timedelta
from HelpFunctions.calc_score import evaluate_horizon
import pandas as pd
from DAX.Models.baseline import baseline
from HelpFunctions.mix_models import mix_models
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
# import importlib
# importlib.reload(Energy.Models)

In [None]:
df = get_dax_data()

In [None]:
# We have to start with the tuesday data
start_date_excl = most_recent_thursday(df) - timedelta(days=1)
df_cval = df.loc[df.index < start_date_excl]
df_cval.index = df_cval.index.date

In [None]:
df_cval.tail()

In [None]:
def evaluate_models(models, df, last_x, years =False, months=False, weeks=False):
    # Check that exactly one of the boolean parameters is True
    if sum([years, months, weeks]) != 1:
        raise ValueError("Exactly one of the boolean parameters (years, months, weeks) must be True.")
    
    years = int(years)
    months = int(months)
    weeks = int(weeks)
        
    for m in models:
        print(f'*********** Start the evaluation of Model {m["name"]} ***********')
        m['evaluation'] = evaluate_model(m, df, last_x, years, months, weeks)
        
def evaluate_model(model, df, last_x, years, months, weeks):
    df_before = df
    evaluation = pd.DataFrame()
    
    for w in range(last_x):
        print(f'Iteration {w+1} of {last_x}')
        df_before, df_after = split_time(df_before, num_years=years, num_months=months, num_weeks=weeks)
        
        pred = None
        # Is mixed model?
        if callable(model['function']):
            pred = model['function'](df_before)
        else: 
            pred = mix_models(model['function'][0], model['function'][1], df_before)
        # Makes sure we try to find observations for dates that have an observation (e.g. 05-01 is missing)
        dates = [[horizon+1, d] for horizon,d in enumerate(pred['forecast_date']) if d in df.index]
        pred = pred.set_index('forecast_date')

        
        # Add observations to pred
        pred['observation'] = ''
        pred['score'] = ''
        for h,d in dates:
            # Look up the right observation for date d
            o = df.loc[d][f'ret{h}']
            pred.loc[d,'observation'] = o
    
        # Add scores to pred
        for index, row in pred.iterrows():
            quantile_preds = row[['q0.025','q0.25','q0.5','q0.75','q0.975']]
            observation = row['observation']
            
            
            
            print(f'Obs: :{observation}:')
            print(type(observation))
            score = evaluate_horizon(quantile_preds, observation)
            
            pred.at[index, 'score'] = score
            
        evaluation = pd.concat([evaluation, pred])
    evaluation = evaluation.sort_index()
    
    return evaluation

In [None]:
from DAX.Models.garch11_t import garch11_t
from DAX.Models.garch11 import garch11
from DAX.Models.baseline_100 import baseline_100
from DAX.Models.baseline_300 import baseline_300

models = [
    {
        'name': 'baseline',
        'function': baseline
     },
    {
        'name': 'garch11',
        'function': garch11
     },
    # {
    #     'name': 'garch11_t',
    #     'function': garch11_t
    #  },
    {
        'name': 'baseline_garch11',
        'function': [[baseline, garch11],[0.5,0.5]]
     },
    # {
    #     'name': 'baseline_300',
    #     'function': baseline_300
    #  },
    # {
    #     'name': 'baseline_100',
    #     'function': baseline_100
    #  },
]


In [None]:
evaluate_models(models, df, 100, weeks=True)

In [None]:
with open('./Model evaluations/BL_GARCH11_MM-BL-GARCH11.pkl', 'wb') as f:
    pickle.dump(models, f)
#         
# with open('./Model evaluations/baseline.pkl', 'rb') as f:
#     models = pickle.load(f)

In [None]:
models[0]['evaluation']

In [None]:
horizons = [str(i) + " day" for i in (1, 2, 5, 6, 7)]
scores = []
names = []
for m in models:
    scores_m = m['evaluation'].loc[:,('horizon','score')]
    names.append(f'horizon_{m["name"]}')
    names.append(m["name"])
    scores.append(scores_m)

score_df = pd.concat(scores, axis=1)
score_df.columns = names

In [None]:
score_df

In [None]:
for horizon in [str(i) + " day" for i in (1, 2, 5, 6, 7)]:
    filtered_df = score_df[score_df['horizon_garch11'] == horizon]
    # sns.kdeplot(data=filtered_df['baseline'], fill=True, label='Baseline')
    sns.kdeplot(data=filtered_df['garch11'], fill=True, label='Garch(1,1)')
    sns.kdeplot(data=filtered_df['baseline_garch11'], fill=True, label='Mixed')
    # sns.kdeplot(data=filtered_df['baseline_300'], fill=True, label='Baseline 300')
    
    
    # Adding labels and title
    plt.xlabel('Score')
    plt.ylabel('Density')
    plt.title(f'Density Plot of Scores: {horizon}')
    plt.legend()
    
    # plt.xlim(0,8)
    # Display the plot
    plt.show()

In [None]:
for horizon in [str(i) + " day" for i in (1, 2, 5, 6, 7)]:
    filtered_df = score_df[score_df['horizon_garch11'] == horizon]

    # Plot values from two columns over time
    # filtered_df = filtered_df[score_df['garch11'].notna()]
    # plt.plot(score_df_36_no_na.index, score_df_36_no_na['baseline: 36 hour'], label='baseline')
    # plt.plot(filtered_df.index, filtered_df['baseline'], label='baseline')
    # plt.plot(score_df_36_no_na.index, score_df_36_no_na['model2: 36 hour'], label='model2')
    plt.plot(filtered_df.index, filtered_df['garch11'], label='garch11')
    plt.plot(filtered_df.index, filtered_df['baseline_garch11'], label='mixed')
    # plt.plot(filtered_df.index, filtered_df['garch11_t'], label='garch11_t')
    
    
    # Adding labels and title
    plt.xlabel('time')
    plt.ylabel('score')
    plt.title('Comparison of scores over time')
    
    # Display legend
    plt.legend()
    
    # Show the plot
    plt.show()