In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from functions import get_DAX
import tensorflow as tf

In [3]:
df = get_DAX.get()

In [59]:
from keras.models import Model
from keras.layers import LSTM, Dense, Input, RepeatVector, TimeDistributed
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from functions import reorder_quantiles

def dax_LSTM(input_data, date_str='2022-03-18'):
    df = pd.DataFrame(input_data)
    
    # Ensure the DataFrame index is of datetime type and normalize to remove the time part (if needed)
    df.index = pd.to_datetime(df.index).normalize()
    
    # Convert date_str to datetime, taking into account the timezone
    target_date = pd.to_datetime(date_str).tz_localize('Europe/Berlin').normalize()  # Adjust the timezone as per your data
    
    # Validate if target_date is in the DataFrame index
    if target_date not in df.index:
        raise ValueError(f"date_str {date_str} not found in dataset index")
    
    # Assuming these columns are what we're using to predict
    feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'lag_ret1', 'lag_ret2', 'lag_ret3', 'lag_ret4', 'lag_ret5']
    features = df.loc[:target_date, feature_columns].values
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    features_scaled = np.reshape(features_scaled, (features_scaled.shape[0], 1, features_scaled.shape[1]))
    
    # Locate the index for the prediction start date
    prediction_start_index = df.index.get_loc(target_date)
    
    # Select the scaled features for the specified date_str for prediction
    last_features = features_scaled[prediction_start_index].reshape(1, 1, -1)
    # Targets for quantile regression would ideally be structured for direct prediction,
    # but here we'll simulate as if the model is predicting 5 specific values for simplicity.
    # Adjust this as necessary for real quantile prediction.
    target_columns = ['future_ret1', 'future_ret2', 'future_ret3', 'future_ret4', 'future_ret5']
    targets = df.loc[:target_date, target_columns]
    scaler_y = StandardScaler()
    targets_scaled = scaler_y.fit_transform(targets)
    
    # Model
    input_seq = Input(shape=(features_scaled.shape[1], features_scaled.shape[2]))
    encoder_out, state_h, state_c = LSTM(100, return_state=True)(input_seq)
    encoder_states = [state_h, state_c]

    decoder_lstm = LSTM(100, return_sequences=True)
    decoder_out = decoder_lstm(RepeatVector(1)(encoder_out), initial_state=encoder_states)
    decoder_dense = TimeDistributed(Dense(25))  # Adjusting for 5 quantiles * 5 future returns
    decoder_outputs = decoder_dense(decoder_out)
    model = Model(inputs=input_seq, outputs=decoder_outputs)
    model.compile(optimizer='adam', loss='mse')
    
    # Training
    history = model.fit(features_scaled, np.repeat(targets_scaled, 5, axis=1), epochs=10, batch_size=72, verbose=2, shuffle=False)
    
    # Predictions for the last available day
    predictions_scaled = model.predict(last_features)
    
    # Since predictions_scaled is (1, 25), and we need it to match the original targets' shape for inverse_transform,
    # Let's first reshape predictions to mimic 5 future returns for a single sample, ignoring the quantile dimension.
    # This is a workaround and simplifies the interpretation of the predictions.
    # A more accurate approach would involve handling each quantile's predictions separately.
    
    # Assuming the predictions_scaled array is shaped (1, 25), corresponding to 5 days and 5 quantiles each
    predictions = predictions_scaled.reshape(5, 5)  # Reshape to (5 days, 5 quantiles)
    
    # Prepare the output DataFrame directly from predictions
    forecast_date = pd.to_datetime(date_str).strftime('%Y-%m-%d')  # This is the reference forecast date
    
    # Horizons are based on forecast_date + 1 day, +2 days, etc.
    horizons = [(pd.to_datetime(forecast_date) + pd.Timedelta(days=i)).strftime('%Y-%m-%d') for i in range(1, 6)]
    
    predictions_df = pd.DataFrame(predictions, columns=['q0.025', 'q0.25', 'q0.5', 'q0.75', 'q0.975'])
    predictions_df.insert(0, 'horizon', ['1 day', '2 day', '3 day', '4 day', '5 day']) # Now horizon represents the actual forecast dates
    predictions_df.insert(0, 'target', 'DAX')
    
    # Since forecast_date should be locked, it means all forecasts are made on this date
    forecast_date_column = [forecast_date] * 5  # Repeat the forecast_date for all rows
    predictions_df.insert(0, 'forecast_date', forecast_date_column)
    predictions_df= reorder_quantiles.reorder_quantiles(predictions_df)
    
    return predictions_df


In [60]:
predictions = dax_LSTM(df)

2024-03-24 20:34:25.981467: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-03-24 20:34:25.982579: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-03-24 20:34:25.983297: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 1/10


2024-03-24 20:34:26.634002: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-03-24 20:34:26.635033: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-03-24 20:34:26.635806: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

120/120 - 2s - loss: 0.9989 - 2s/epoch - 15ms/step
Epoch 2/10
120/120 - 0s - loss: 0.9973 - 354ms/epoch - 3ms/step
Epoch 3/10
120/120 - 0s - loss: 0.9970 - 353ms/epoch - 3ms/step
Epoch 4/10
120/120 - 0s - loss: 0.9968 - 358ms/epoch - 3ms/step
Epoch 5/10
120/120 - 0s - loss: 0.9967 - 362ms/epoch - 3ms/step
Epoch 6/10
120/120 - 0s - loss: 0.9966 - 353ms/epoch - 3ms/step
Epoch 7/10
120/120 - 0s - loss: 0.9965 - 356ms/epoch - 3ms/step
Epoch 8/10
120/120 - 0s - loss: 0.9964 - 354ms/epoch - 3ms/step
Epoch 9/10
120/120 - 0s - loss: 0.9964 - 356ms/epoch - 3ms/step
Epoch 10/10
120/120 - 0s - loss: 0.9963 - 349ms/epoch - 3ms/step


2024-03-24 20:34:31.878610: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-03-24 20:34:31.879744: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-03-24 20:34:31.880432: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



In [61]:
predictions

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2022-03-18,DAX,1 day,-0.017575,-0.018717,-0.014414,-0.020012,-0.020352
1,2022-03-18,DAX,2 day,-0.020772,-0.032834,-0.020374,-0.024957,-0.0276
2,2022-03-18,DAX,3 day,-0.028952,-0.019869,-0.028244,-0.033481,-0.025934
3,2022-03-18,DAX,4 day,-0.031593,-0.043827,-0.044109,-0.032364,-0.035192
4,2022-03-18,DAX,5 day,-0.041386,-0.027359,-0.045852,-0.040013,-0.032437


In [47]:
df['2023-03-18':'2024-03-22']

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,future_ret1,future_ret2,future_ret3,future_ret4,future_ret5,lag_ret1,lag_ret2,lag_ret3,lag_ret4,lag_ret5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-20 00:00:00+01:00,14715.400391,14980.429688,14458.389648,14933.379883,125262600,0.0,0.0,-1.738982,-1.876105,-1.837976,-0.159586,-1.292719,1.112273,-0.225546,1.335572,-1.985400,-0.174556
2023-03-21 00:00:00+01:00,15062.059570,15253.099609,15055.929688,15195.339844,94322300,0.0,0.0,-0.137123,-0.098993,1.579397,0.446263,0.351516,1.738982,2.851256,1.513436,3.074555,-0.246418
2023-03-22 00:00:00+01:00,15174.669922,15298.490234,15166.950195,15216.190430,82685900,0.0,0.0,0.038130,1.716519,0.583386,0.488639,-0.737210,0.137123,1.876105,2.988379,1.650559,3.211678
2023-03-23 00:00:00+01:00,15178.250000,15244.320312,15078.719727,15210.389648,82739800,0.0,0.0,1.678390,0.545257,0.450509,-0.775340,-2.030544,-0.038130,0.098993,1.837976,2.950249,1.612429
2023-03-24 00:00:00+01:00,15135.500000,15135.650391,14809.820312,14957.230469,181078000,0.0,0.0,-1.133133,-1.227880,-2.453730,-3.708934,-4.392309,-1.678390,-1.716519,-1.579397,0.159586,1.271859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-11 00:00:00+01:00,17690.949219,17750.720703,17662.550781,17746.269531,66646400,0.0,0.0,-1.225618,-1.204862,-1.097118,-1.067080,-1.044940,-0.383795,-0.542752,0.166701,0.270106,0.169755
2024-03-12 00:00:00+01:00,17810.150391,17973.220703,17746.890625,17965.109375,91388300,0.0,0.0,0.020756,0.128500,0.158538,0.180678,-0.124502,1.225618,0.841823,0.682866,1.392319,1.495725
2024-03-13 00:00:00+01:00,17984.500000,18001.419922,17939.560547,17961.380859,92880500,0.0,0.0,0.107743,0.137782,0.159922,-0.145258,-0.298806,-0.020756,1.204862,0.821066,0.662110,1.371563
2024-03-14 00:00:00+01:00,18002.970703,18039.050781,17899.929688,17942.039062,93590600,0.0,0.0,0.030038,0.052178,-0.253002,-0.406550,-1.313432,-0.107743,-0.128500,1.097118,0.713323,0.554366
