# Create Submission File

## Imports and get data

In [1]:
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
import pandas as pd
import numpy as np
from HelpFunctions.calc_score import evaluate_horizon
from HelpFunctions.mix_models import mix_models
from HelpFunctions.date_and_time import most_recent_thursday, split_time

from DAX.HelpFunctions.get_dax_data import get_dax_data
from DAX.Models import baseline as dax_bl
from DAX.Models import garch11 as dax_garch

from Energy.HelpFunctions.get_energy_data import get_energy_data, prepare_data
from Energy.Models import baseline as en_bl
from Energy.Models import Model4_population as en_m4_pop
from datetime import datetime
import pytz

# import importlib
# importlib.reload(Energy.Models)

# import importlib
# importlib.reload(Energy.Models)



### DAX: Get and prepare Data
Use tuesday as last datapoint

In [2]:
# Insert Date of Wednesday (forecast-day) here. The current day has to be wednesday!!
start_date = datetime(2023, 11, 22, 12, 0, 0)

In [3]:
df_dax = get_dax_data()
# pretend_start_date = date(2023, 11, 22, 11, 00, 0, tzinfo=pytz.timezone('Europe/Berlin'))

df_dax = df_dax[df_dax.index.date <= start_date.date()]

### Energy: Get and prepare Data

In [4]:
# from Energy.HelpFunctions.get_energy_data import fetch_energy_data
# fetch_energy_data()

In [5]:
df_en = get_energy_data(force_return=True)
df_en = prepare_data(df_en)



In [6]:
df_en = df_en.loc[df_en.index <= start_date]

## Run models for the data

### DAX

In [7]:
# dax_bl.baseline(df_dax)
forecast_dax = dax_garch.garch11(df_dax)

### Energy

In [8]:
# en_bl.baseline(df_en, LAST_IDX=-1, offset_horizons=11)
forecast_en = en_m4_pop.model4_population(df_en, offset_horizons=11)



### Infections

In [17]:
forecast_infections = {
    'forecast_date': [start_date.strftime('%Y-%m-%d')] * 5,
    'target': ['infections'] * 5,
    'horizon': ['0 week', '1 week', '2 week', '3 week', '4 week'],
    # 'q0.025': ['NA'] * 5,
    # 'q0.25': ['NA'] * 5,
    # 'q0.5': ['NA'] * 5,
    # 'q0.75': ['NA'] * 5,
    # 'q0.975': ['NA'] * 5,
    'q0.025': [np.nan] * 5,
    'q0.25': [np.nan] * 5,
    'q0.5': [np.nan] * 5,
    'q0.75': [np.nan] * 5,
    'q0.975': [np.nan] * 5
}

forecast_infections = pd.DataFrame(forecast_infections)

In [18]:
forecast_infections

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-22,infections,0 week,,,,,
1,2023-11-22,infections,1 week,,,,,
2,2023-11-22,infections,2 week,,,,,
3,2023-11-22,infections,3 week,,,,,
4,2023-11-22,infections,4 week,,,,,


## Merge results

Check results: Are they valid, forecast_date is right? The last entry might be cut off

In [19]:
forecasts = pd.concat([forecast_dax,forecast_en, forecast_infections]).reset_index().drop(columns=['index'])
forecasts

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-23,DAX,1 day,-1.704474,-0.586567,0.0,0.586567,1.704474
1,2023-11-24,DAX,2 day,-2.5384,-0.873549,0.0,0.873549,2.5384
2,2023-11-27,DAX,5 day,-3.201805,-1.101849,0.0,1.101849,3.201805
3,2023-11-28,DAX,6 day,-4.264001,-1.467387,0.0,1.467387,4.264001
4,2023-11-29,DAX,7 day,-5.25163,-1.807263,0.0,1.807263,5.25163
5,2023-11-24 12:00:00,energy,36 hour,49.780452,59.711653,65.078282,68.350023,74.826615
6,2023-11-24 16:00:00,energy,40 hour,49.772025,57.927653,62.675497,66.118272,73.109561
7,2023-11-24 20:00:00,energy,44 hour,47.186092,54.170055,57.832552,61.0945,68.285615
8,2023-11-25 12:00:00,energy,60 hour,41.1298,50.936517,55.292036,58.089,63.756366
9,2023-11-25 16:00:00,energy,64 hour,41.121373,49.152517,52.889251,55.857249,62.039311


If everything seems good: Override forecast_date

In [20]:
forecasts['forecast_date'] = [start_date.strftime('%Y-%m-%d') for _ in range(len(forecasts))]

In [21]:
forecasts

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2023-11-22,DAX,1 day,-1.704474,-0.586567,0.0,0.586567,1.704474
1,2023-11-22,DAX,2 day,-2.5384,-0.873549,0.0,0.873549,2.5384
2,2023-11-22,DAX,5 day,-3.201805,-1.101849,0.0,1.101849,3.201805
3,2023-11-22,DAX,6 day,-4.264001,-1.467387,0.0,1.467387,4.264001
4,2023-11-22,DAX,7 day,-5.25163,-1.807263,0.0,1.807263,5.25163
5,2023-11-22,energy,36 hour,49.780452,59.711653,65.078282,68.350023,74.826615
6,2023-11-22,energy,40 hour,49.772025,57.927653,62.675497,66.118272,73.109561
7,2023-11-22,energy,44 hour,47.186092,54.170055,57.832552,61.0945,68.285615
8,2023-11-22,energy,60 hour,41.1298,50.936517,55.292036,58.089,63.756366
9,2023-11-22,energy,64 hour,41.121373,49.152517,52.889251,55.857249,62.039311


### Check df & Save csv-File

In [23]:
from HelpFunctions.check_submission import check_submission_df

check_submission_df(forecasts)
forecasts.to_csv(f'./Abgaben/{start_date.strftime("%Y%m%d")}_JakePeralta.csv', index=False)

Start checking...
---------------------------
Checking the Columns...
Checking type of columns...





Checking if the Dates make sense...
Checking targets...
Checking quantiles...
Assume that -- infections -- is your NaN-target. Please DOUBLECHECK if this is correct.
---------------------------
Looks good!


  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
  diffs[0] = 0
