## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, GRU, RNN, Conv1D
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
plt.rcParams.update({'figure.figsize': (16, 9)})

## Load data

In [2]:
data = pd.read_csv('../data/processed/company_id_5_processed.csv', parse_dates=True)
data = data[['txn_date', 'open', 'high', 'low', 'close','vol', 
#              'month', 'day', 'day_of_month'
            ]].sort_values(by='txn_date')
data.head()

Unnamed: 0,txn_date,open,high,low,close,vol
0,2010-09-20,13.0,13.1,12.5,12.6,10323500.0
1,2010-09-21,12.8,13.2,12.7,12.9,3327500.0
2,2010-09-22,13.0,13.4,12.9,13.0,2765500.0
3,2010-09-23,13.3,13.4,13.0,13.0,2329000.0
4,2010-09-26,13.0,13.2,12.8,12.8,1608500.0


## Preprocessing

In [3]:
data['txn_date'] = pd.to_datetime(data['txn_date'])
data.set_index('txn_date', inplace=True, drop=True)

In [4]:
data['close_roc'] = data['close'].pct_change()
data['close_log_roc'] = np.log(1+data['close_roc'])

In [5]:
data.dropna(inplace=True)
data.head()

Unnamed: 0_level_0,open,high,low,close,vol,close_roc,close_log_roc
txn_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-09-21,12.8,13.2,12.7,12.9,3327500.0,0.02381,0.02353
2010-09-22,13.0,13.4,12.9,13.0,2765500.0,0.007752,0.007722
2010-09-23,13.3,13.4,13.0,13.0,2329000.0,0.0,0.0
2010-09-26,13.0,13.2,12.8,12.8,1608500.0,-0.015385,-0.015504
2010-09-27,12.7,13.0,12.6,12.6,1332000.0,-0.015625,-0.015748


In [6]:
def custom_ts_data_prep(dataset, target, start, end, window, horizon):
    X = []
    y = []
    start = start + window
    if end is None:
        end = len(dataset) - horizon
    for i in range(start, end):
        indices = range(i-window, i)
        X.append(dataset[indices])
        indicey = range(i+1, i+1+horizon)
        y.append(target[indicey])
    return np.array(X), np.array(y)

In [7]:
x_scaler = MinMaxScaler(feature_range=(-1,1))
y_scaler = MinMaxScaler(feature_range=(0,1))
data_x = x_scaler.fit_transform(data)
data_y = x_scaler.fit_transform(data[['close_roc']])

In [8]:
data_x[:2], data_y[:2]

(array([[ 0.89361702,  0.85858586,  0.93406593,  0.97802198, -0.58680446,
         -0.0781963 , -0.00424251],
        [ 0.93617021,  0.8989899 ,  0.97802198,  1.        , -0.65659138,
         -0.18356606, -0.11090363]]),
 array([[-0.0781963 ],
        [-0.18356606]]))

In [9]:
hist_window = 48
horizon = 10
TRAIN_SPLIT = int(len(data_x)*0.8)
x_train_multi, y_train_multi = custom_ts_data_prep(data_x, data_y, 0, TRAIN_SPLIT, hist_window, horizon)
x_val_multi, y_val_multi= custom_ts_data_prep(data_x, data_y, TRAIN_SPLIT, None, hist_window, horizon)

In [10]:
assert(x_train_multi.shape[0]  == y_train_multi.shape[0])
assert(x_val_multi.shape[0]  == y_val_multi.shape[0])

In [11]:
BATCH_SIZE = 256
BUFFER_SIZE = 150
train_data = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
val_data = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data = val_data.batch(BATCH_SIZE).repeat()

In [12]:
lstm_model = Sequential([
    LSTM(40, input_shape=x_train_multi.shape[-2:], return_sequences=True),
    LSTM(units=20,return_sequences=True),
    LSTM(units=15),
    Dense(units=1)
])
lstm_model.compile(optimizer='adam', loss='mse')

In [13]:
model_path = r'../models/LSTM_Multivariate.h5'

In [None]:
EVALUATION_INTERVAL = 100
EPOCHS = 150
history = lstm_model.fit(train_data,
    epochs=EPOCHS,steps_per_epoch=EVALUATION_INTERVAL,validation_data=val_data, validation_steps=50,verbose =1,
    callbacks =[tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,
    verbose=1, mode='min'),tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min',
    verbose=0)])

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
  3/100 [..............................] - ETA: 6s - loss: 0.0163

In [None]:
model = tf.keras.models.load_model(model_path)

In [None]:
model.summary()