# Create Submission File

## Imports and get data

In [1]:
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
import pandas as pd
import numpy as np
from HelpFunctions.calc_score import evaluate_horizon
from HelpFunctions.mix_models import mix_models
from HelpFunctions.date_and_time import most_recent_thursday, split_time

from DAX.HelpFunctions.get_dax_data import get_dax_data
from DAX.Models import baseline as dax_bl
from DAX.Models import garch11 as dax_garch

from Energy.HelpFunctions.get_energy_data import get_energy_data, prepare_data
from Energy.Models import baseline as en_bl
from Energy.Models import Model4_population as en_m4_pop
from datetime import datetime
import pytz

# import importlib
# importlib.reload(Energy.Models)

# import importlib
# importlib.reload(Energy.Models)



### DAX: Get and prepare Data
Use tuesday as last datapoint

In [2]:
# Insert Date of Wednesday (forecast-day) here. The current day has to be wednesday!!
start_date = datetime(2023, 11, 29, 8, 0, 0)

In [3]:
df_dax = get_dax_data()
# pretend_start_date = date(2023, 11, 22, 11, 00, 0, tzinfo=pytz.timezone('Europe/Berlin'))

df_dax = df_dax[df_dax.index.date <= start_date.date()]

In [4]:
df_dax

Unnamed: 0_level_0,Close,ret1,ret2,ret3,ret4,ret5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1987-12-30 00:00:00+01:00,1005.190002,,,,,
1988-01-04 00:00:00+01:00,956.489990,-4.966154,,,,
1988-01-05 00:00:00+01:00,996.099976,4.057731,-0.908423,,,
1988-01-06 00:00:00+01:00,1006.010010,0.989967,5.047698,0.081544,,
1988-01-07 00:00:00+01:00,1014.469971,0.837426,1.827393,5.885123,0.918970,
...,...,...,...,...,...,...
2023-11-23 00:00:00+01:00,15994.730469,0.231031,0.590686,0.585656,0.473590,1.309719
2023-11-24 00:00:00+01:00,16029.490234,0.217084,0.448115,0.807771,0.802741,0.690674
2023-11-27 00:00:00+01:00,15966.370117,-0.394552,-0.177468,0.053563,0.413218,0.408188
2023-11-28 00:00:00+01:00,15992.669922,0.164584,-0.229968,-0.012883,0.218148,0.577803


### Energy: Get and prepare Data

In [5]:
# from Energy.HelpFunctions.get_energy_data import fetch_energy_data
# fetch_energy_data()

In [6]:
df_en = get_energy_data(force_return=True)
df_en = prepare_data(df_en)

In [7]:
df_en = df_en.loc[df_en.index <= start_date]

In [8]:
df_en

Unnamed: 0_level_0,gesamt,weekday
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-24 00:00:00,42.02925,0
2018-12-24 01:00:00,39.61025,0
2018-12-24 02:00:00,39.13875,0
2018-12-24 03:00:00,39.42100,0
2018-12-24 04:00:00,40.74775,0
...,...,...
2023-11-29 04:00:00,52.31725,2
2023-11-29 05:00:00,55.00700,2
2023-11-29 06:00:00,62.19825,2
2023-11-29 07:00:00,67.06550,2


## Run models for the data

### DAX

In [9]:
from DAX.Models.baseline import baseline
# dax_bl.baseline(df_dax)
# forecast_dax = dax_garch.garch11(df_dax)
forecast_dax = mix_models([dax_garch.garch11, baseline], [1,1], df_dax, 'DAX')
forecast_dax

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-30,DAX,1 day,-2.455251,-0.547924,0.033557,0.632592,2.154504
1,2023-12-01,DAX,2 day,-3.130767,-0.782713,0.038871,0.870507,2.872066
2,2023-12-04,DAX,5 day,-4.00758,-0.930672,0.081452,1.086359,3.692348
3,2023-12-05,DAX,6 day,-5.055998,-1.180942,0.100559,1.380627,4.598382
4,2023-12-06,DAX,7 day,-5.596188,-1.312663,0.171306,1.56407,5.358285


### Energy

In [10]:
from Energy.Models.baseline import baseline
from HelpFunctions.mix_models import mix_models_per_horizon
from Energy.Models.Model4_sunhours import model4_sunhours
from Energy.Models.Model4_holidays_2 import model4_holidays_2
from Energy.Models.Model4_population import model4_population
from Energy.Models.Model5 import model5

# en_bl.baseline(df_en, LAST_IDX=-1, offset_horizons=11)

weights_m5_bl_m4_x = [[1,1,1,0,0,0.3],[0,0,0,0.25,0.25,0.7],[0,0,0,0.25,0.25,0],[0,0,0,0.25,0.25,0],[0,0,0,0.25,0.25,0]]
functions_m5_bl_m4_x = [model5, baseline, model4_population, model4_holidays_2, model4_sunhours]




# forecast_en = en_m4_pop.model4_population(df_en, offset_horizons=15)
forecast_en = mix_models_per_horizon(functions_m5_bl_m4_x, weights_m5_bl_m4_x, df_en, target="energy", offset_horizons=15)
forecast_en

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-12-01 12:00:00,energy,36 hour,50.467131,60.497766,66.051668,69.157071,75.901056
1,2023-12-01 16:00:00,energy,40 hour,50.025403,58.757404,63.646105,67.067508,73.881302
2,2023-12-01 20:00:00,energy,44 hour,47.65478,54.893569,58.800761,62.064985,68.97718
3,2023-12-02 12:00:00,energy,60 hour,43.415123,52.010165,56.19526,59.430764,64.503686
4,2023-12-02 16:00:00,energy,64 hour,42.424454,49.698458,53.552043,56.876443,62.514251
5,2023-12-02 20:00:00,energy,68 hour,41.852393,46.22711,49.266626,52.891458,59.335954


### Infections

In [11]:
forecast_infections = {
    'forecast_date': [start_date.strftime('%Y-%m-%d')] * 5,
    'target': ['infections'] * 5,
    'horizon': ['0 week', '1 week', '2 week', '3 week', '4 week'],
    # 'q0.025': ['NA'] * 5,
    # 'q0.25': ['NA'] * 5,
    # 'q0.5': ['NA'] * 5,
    # 'q0.75': ['NA'] * 5,
    # 'q0.975': ['NA'] * 5,
    'q0.025': [np.nan] * 5,
    'q0.25': [np.nan] * 5,
    'q0.5': [np.nan] * 5,
    'q0.75': [np.nan] * 5,
    'q0.975': [np.nan] * 5
}

forecast_infections = pd.DataFrame(forecast_infections)

In [12]:
forecast_infections

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-29,infections,0 week,,,,,
1,2023-11-29,infections,1 week,,,,,
2,2023-11-29,infections,2 week,,,,,
3,2023-11-29,infections,3 week,,,,,
4,2023-11-29,infections,4 week,,,,,


## Merge results

Check results: Are they valid, forecast_date is right? The last entry might be cut off

In [13]:
forecasts = pd.concat([forecast_dax,forecast_en, forecast_infections]).reset_index().drop(columns=['index'])
forecasts

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-30,DAX,1 day,-2.455251,-0.547924,0.033557,0.632592,2.154504
1,2023-12-01,DAX,2 day,-3.130767,-0.782713,0.038871,0.870507,2.872066
2,2023-12-04,DAX,5 day,-4.00758,-0.930672,0.081452,1.086359,3.692348
3,2023-12-05,DAX,6 day,-5.055998,-1.180942,0.100559,1.380627,4.598382
4,2023-12-06,DAX,7 day,-5.596188,-1.312663,0.171306,1.56407,5.358285
5,2023-12-01 12:00:00,energy,36 hour,50.467131,60.497766,66.051668,69.157071,75.901056
6,2023-12-01 16:00:00,energy,40 hour,50.025403,58.757404,63.646105,67.067508,73.881302
7,2023-12-01 20:00:00,energy,44 hour,47.65478,54.893569,58.800761,62.064985,68.97718
8,2023-12-02 12:00:00,energy,60 hour,43.415123,52.010165,56.19526,59.430764,64.503686
9,2023-12-02 16:00:00,energy,64 hour,42.424454,49.698458,53.552043,56.876443,62.514251


If everything seems good: Override forecast_date

In [14]:
forecasts['forecast_date'] = [start_date.strftime('%Y-%m-%d') for _ in range(len(forecasts))]

In [15]:
forecasts

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-29,DAX,1 day,-2.455251,-0.547924,0.033557,0.632592,2.154504
1,2023-11-29,DAX,2 day,-3.130767,-0.782713,0.038871,0.870507,2.872066
2,2023-11-29,DAX,5 day,-4.00758,-0.930672,0.081452,1.086359,3.692348
3,2023-11-29,DAX,6 day,-5.055998,-1.180942,0.100559,1.380627,4.598382
4,2023-11-29,DAX,7 day,-5.596188,-1.312663,0.171306,1.56407,5.358285
5,2023-11-29,energy,36 hour,50.467131,60.497766,66.051668,69.157071,75.901056
6,2023-11-29,energy,40 hour,50.025403,58.757404,63.646105,67.067508,73.881302
7,2023-11-29,energy,44 hour,47.65478,54.893569,58.800761,62.064985,68.97718
8,2023-11-29,energy,60 hour,43.415123,52.010165,56.19526,59.430764,64.503686
9,2023-11-29,energy,64 hour,42.424454,49.698458,53.552043,56.876443,62.514251


### Check df & Save csv-File

In [16]:
from HelpFunctions.check_submission import check_submission_df

check_submission_df(forecasts)
forecasts.to_csv(f'./Abgaben/{start_date.strftime("%Y%m%d")}_JakePeralta.csv', na_rep='NA', index=False)

Start checking...
---------------------------
Checking the Columns...
Checking type of columns...





Checking if the Dates make sense...
Checking targets...
Checking quantiles...
Assume that -- infections -- is your NaN-target. Please DOUBLECHECK if this is correct.
---------------------------
Looks good!


  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
