# Model : LTSM

In [1]:
import pandas as pd
import numpy as np
import tensorflow.keras
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

pd.options.mode.chained_assignment = None 

2022-02-22 10:13:16.846733: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-22 10:13:16.846765: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Load data
df = pd.read_csv("../data/ohlcv_m6.csv")
print(df)

              Date Symbol       Open       High        Low      Close  \
0       2013-01-02   ABBV  23.848844  24.176665  23.288819  23.985435   
1       2013-01-03   ABBV  23.903484  23.903484  23.329800  23.787382   
2       2013-01-04   ABBV  23.643949  23.828348  23.391256  23.486870   
3       2013-01-07   ABBV  23.322972  24.210815  23.322972  23.534687   
4       2013-01-08   ABBV  23.418585  23.657618  22.783435  23.022469   
...            ...    ...        ...        ...        ...        ...   
542265  2022-02-11    VXX  20.469999  23.870001  20.295000  23.240000   
542266  2022-02-14    VXX  23.309999  24.799999  23.010000  23.309999   
542267  2022-02-15    VXX  21.680000  22.219999  21.330000  21.389999   
542268  2022-02-16    VXX  21.700001  22.139999  20.459999  20.530001   
542269  2022-02-17    VXX  21.709999  23.090000  21.620001  22.959999   

           Volume  
0        13767900  
1        16739300  
2        21372100  
3        17897100  
4        17863300  
...

In [3]:
def make_predictions(df):
    # PARAMETERS
    split_percent = 0.99 # Split ratio for the train/test split
    look_back = 20 # Look back for the LTSM
    num_epochs = 100 # Number of epochs for the LTSM
    num_prediction = 30 # Make forecast for the next month
    
    # Forecast dates
    last_date = df['Date'].values[-1]
    forecast_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    
    # This dataframe stores the forecasts of the next month for every symbols
    res = pd.DataFrame()
    res["Date"] = forecast_dates
    
    # Get list of symbols
    symbols = pd.unique(df["Symbol"].values.ravel())

    for symbol in symbols:
        tf.keras.backend.clear_session()
        print(symbol)
        
        # Get data. Keep only Close values
        dataframe = df[df["Symbol"]==symbol]
        #print("##############",len(dataframe))
        dataframe['Date'] = pd.to_datetime(dataframe['Date'], format="%Y-%m-%d")
        dataframe.set_axis(dataframe['Date'], inplace=True)
        dataframe.drop(columns=['Open', 'High', 'Low', 'Volume'], inplace=True)
        close_data = dataframe["Close"].values.reshape(-1,1)
        
        #print("##############",len(close_data))
        
        # Split
        split = int(split_percent*len(close_data))
        close_train = close_data[:split]
        close_test = close_data[split:]
        date_train = dataframe['Date'][:split]
        date_test = dataframe['Date'][split:]
        
        train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)
        test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

        # Create the model (LTSM)
        model = Sequential()
        model.add(
            LSTM(10,
                activation='relu',
                input_shape=(look_back,1))
        )
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mse')
        
        dot_img_file = symbol+"_model.png"
        tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)

        
        # Fit the model
        model.fit(train_generator, epochs=num_epochs, verbose=2)
        
        # Predictions
        prediction = model.predict(test_generator)

        close_train = close_train.reshape((-1))
        close_test = close_test.reshape((-1))
        prediction = prediction.reshape((-1))
                
        # Make prediction for the next month
        close_data = close_data.reshape((-1))

       
        prediction_list = close_data[-look_back:]

        for _ in range(num_prediction):
            x = prediction_list[-look_back:]
            x = x.reshape((1, look_back, 1))
            out = model.predict(x)[0][0]
            prediction_list = np.append(prediction_list, out)
        forecast = prediction_list[look_back-1:]

        
        # Transform results as dataframes
        df_train = pd.DataFrame({"Date":date_train,"Train" : close_train})
        df_test = pd.DataFrame({"Date":date_test,"Test" : close_test})
        df_prediction = pd.DataFrame({"Date":date_test[look_back:],"Prediction" : prediction})
        df_forecast = pd.DataFrame({"Date":forecast_dates,"Forecast" : forecast})
        res[symbol] = forecast
        
        
    return res#df_train, df_test, df_prediction, df_forecast
        

In [4]:
#df_train, df_test, df_prediction, df_forecast = make_predictions(df)
forecasts = make_predictions(df)

ABBV


2022-02-22 10:13:21.517414: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-02-22 10:13:21.519069: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-02-22 10:13:21.519095: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2022-02-22 10:13:21.519128: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kfdell): /proc/driver/nvidia/version does not exist
2022-02-22 10:13:21.519505: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropri

Epoch 1/100
113/113 - 2s - loss: 57020.9102
Epoch 2/100
113/113 - 1s - loss: 22365.8125
Epoch 3/100
113/113 - 1s - loss: 4077.9761
Epoch 4/100
113/113 - 1s - loss: 1648.9138
Epoch 5/100
113/113 - 1s - loss: 8423.1504
Epoch 6/100
113/113 - 1s - loss: 7396.3374
Epoch 7/100
113/113 - 1s - loss: 5195.9585
Epoch 8/100
113/113 - 1s - loss: 5109.0830
Epoch 9/100
113/113 - 1s - loss: 5034.0278
Epoch 10/100
113/113 - 1s - loss: 4955.4619
Epoch 11/100
113/113 - 1s - loss: 4873.5801
Epoch 12/100
113/113 - 1s - loss: 4788.6011
Epoch 13/100
113/113 - 1s - loss: 4701.1138
Epoch 14/100
113/113 - 0s - loss: 4611.0669
Epoch 15/100
113/113 - 1s - loss: 4516.8618
Epoch 16/100
113/113 - 1s - loss: 4402.5293
Epoch 17/100
113/113 - 1s - loss: 4244.3135
Epoch 18/100
113/113 - 1s - loss: 4105.2207
Epoch 19/100
113/113 - 1s - loss: 3997.5562
Epoch 20/100
113/113 - 1s - loss: 3903.2473
Epoch 21/100
113/113 - 1s - loss: 3813.7134
Epoch 22/100
113/113 - 1s - loss: 3726.6509
Epoch 23/100
113/113 - 1s - loss: 4086.

KeyboardInterrupt: 

In [None]:
#forecasts.plot(x="Date", y=list(forecasts.columns[1:]), label=list(forecasts.columns[1:]))
#plt.show()
forecasts.to_csv("forecasts.csv")