# Model for Boeing
Sentiment is built upon the Reuters titles dataset.
Historical data is taken from yahoo finance

In [1]:
from tqdm import tqdm
import numpy as np
import pickle
import nltk
import string
import os
import pandas as pd
import matplotlib.pyplot as plt
from nltk.stem import PorterStemmer
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords
import re
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import time
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
from keras.models import Sequential, load_model
from keras import layers
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import logging
from datetime import datetime, timedelta
from io import StringIO
import copy

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
Using TensorFlow backend.


## Scraping historical data from yahoo finance

In [2]:
class YahooFinanceHistory:
    timeout = 2
    crumb_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
    crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
    quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{quote}?period1={dfrom}&period2={dto}&interval=1d&events=history&crumb={crumb}'

    def __init__(self, symbol, days_back=7):
        self.symbol = symbol
        self.session = requests.Session()
        self.dt = timedelta(days=days_back)

#requesting crumb and cookie
    def get_crumb(self):
        response = self.session.get(self.crumb_link.format(self.symbol), timeout=self.timeout)
        response.raise_for_status()
        match = re.search(self.crumble_regex, response.text)
        if not match:
            raise ValueError('Could not get crumb from Yahoo Finance')
        else:
            self.crumb = match.group(1)

#requesting data
    def get_quote(self):
        if not hasattr(self, 'crumb') or len(self.session.cookies) == 0:
            self.get_crumb()
        now = datetime.utcnow()
        dateto = int(now.timestamp())
        datefrom = int((now - self.dt).timestamp())
        url = self.quote_link.format(quote=self.symbol, dfrom=datefrom, dto=dateto, crumb=self.crumb)
        response = self.session.get(url)
        response.raise_for_status()
        return pd.read_csv(StringIO(response.text), parse_dates=['Date'])

In [3]:
# extracting data about Boeing from 400 days back
df_v = YahooFinanceHistory('BA', days_back=4000).get_quote()

In [4]:
#sorting dates from the latest to earliest
df_v.sort_values(by='Date')

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2009-06-04,48.630001,50.910000,48.570000,50.570000,38.393864,7324600
1,2009-06-05,52.160000,53.259998,52.160000,52.650002,39.973049,13724100
2,2009-06-08,52.490002,53.330002,51.130001,52.830002,40.109699,7957400
3,2009-06-09,53.389999,53.389999,51.310001,52.349998,39.745274,7723800
4,2009-06-10,52.820000,53.200001,51.099998,52.299999,39.707298,6556200
...,...,...,...,...,...,...,...
2752,2020-05-11,130.919998,131.000000,127.949997,128.910004,128.910004,20018600
2753,2020-05-12,129.720001,133.119995,125.199997,125.220001,125.220001,22579000
2754,2020-05-13,125.000000,125.070000,119.330002,121.500000,121.500000,26002000
2755,2020-05-14,118.980003,122.610001,113.889999,122.519997,122.519997,37499200


In [5]:
df_v.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [6]:
df_v

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2009-06-04,48.630001,50.910000,48.570000,50.570000,38.393864,7324600
1,2009-06-05,52.160000,53.259998,52.160000,52.650002,39.973049,13724100
2,2009-06-08,52.490002,53.330002,51.130001,52.830002,40.109699,7957400
3,2009-06-09,53.389999,53.389999,51.310001,52.349998,39.745274,7723800
4,2009-06-10,52.820000,53.200001,51.099998,52.299999,39.707298,6556200
...,...,...,...,...,...,...,...
2752,2020-05-11,130.919998,131.000000,127.949997,128.910004,128.910004,20018600
2753,2020-05-12,129.720001,133.119995,125.199997,125.220001,125.220001,22579000
2754,2020-05-13,125.000000,125.070000,119.330002,121.500000,121.500000,26002000
2755,2020-05-14,118.980003,122.610001,113.889999,122.519997,122.519997,37499200


## Sentiment for all the articles with "Microsoft" in the body of an article

In [7]:
# uploading file
df2 = pd.read_csv('df_BA.csv')

In [8]:
df2

Unnamed: 0.1,Unnamed: 0,Date,compound_mean
0,0,2011-07-06,-0.493900
1,1,2011-07-07,0.074200
2,2,2011-07-08,0.084700
3,3,2011-07-09,0.153100
4,4,2011-07-10,0.055833
...,...,...,...
1528,1528,2017-02-06,0.055833
1529,1529,2017-02-07,-0.273000
1530,1530,2017-02-10,0.055833
1531,1531,2017-02-13,-0.361200


In [9]:
#deleting column Unnamed
df2 = df2.drop(['Unnamed: 0'], axis=1)

In [10]:
df2

Unnamed: 0,Date,compound_mean
0,2011-07-06,-0.493900
1,2011-07-07,0.074200
2,2011-07-08,0.084700
3,2011-07-09,0.153100
4,2011-07-10,0.055833
...,...,...
1528,2017-02-06,0.055833
1529,2017-02-07,-0.273000
1530,2017-02-10,0.055833
1531,2017-02-13,-0.361200


In [11]:
df2.dtypes

Date              object
compound_mean    float64
dtype: object

In [12]:
#changing column Date type to datetime type
df2.Date=pd.to_datetime(df2['Date'])

In [13]:
#merging dataframe with historical data with dataframe with sentiments 
df3 = pd.merge(df_v,df2,on='Date')

In [14]:
df3

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,compound_mean
0,2011-07-06,74.129997,75.160004,74.010002,74.739998,59.897026,3757800,-0.493900
1,2011-07-07,75.330002,76.199997,74.849998,75.989998,60.898773,4976900,0.074200
2,2011-07-08,75.580002,75.580002,74.570000,75.070000,60.161488,4051200,0.084700
3,2011-07-11,74.169998,74.730003,73.000000,73.349998,58.783066,4379000,-0.157993
4,2011-07-12,73.620003,73.860001,71.790001,71.930000,57.645065,5773000,0.360000
...,...,...,...,...,...,...,...,...
1272,2017-02-06,162.419998,164.080002,162.380005,163.979996,152.032944,3110500,0.055833
1273,2017-02-07,165.000000,167.419998,164.869995,166.500000,154.369339,4243200,-0.273000
1274,2017-02-10,165.250000,166.449997,164.470001,166.229996,155.444748,2689700,0.055833
1275,2017-02-13,166.449997,169.070007,166.350006,168.029999,157.127945,3765000,-0.361200


# Machine learning for prediction of label for the next day 

In [15]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn

In [17]:
#deepcopying dataframe, so there would be no need to run everything from the beggining
df = copy.deepcopy(df3)

In [18]:
#label indicates wheter the price will go up(1) or down(0) next day
def add_label(dfi):
    idx = len(dfi.columns)
    new_col = np.where(dfi['Close'] >= dfi['Close'].shift(1), 1, 0)  
    dfi.insert(loc=idx, column='Label', value=new_col)
    dfi = dfi.fillna(0)
    df['Label'] =  df['Label'].shift(-1, axis = 0)
    df.drop(df.index[len(df)-1], inplace = True)

In [20]:
add_label(df)

In [21]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,compound_mean,Label
0,2011-07-06,74.129997,75.160004,74.010002,74.739998,59.897026,3757800,-0.493900,1.0
1,2011-07-07,75.330002,76.199997,74.849998,75.989998,60.898773,4976900,0.074200,0.0
2,2011-07-08,75.580002,75.580002,74.570000,75.070000,60.161488,4051200,0.084700,0.0
3,2011-07-11,74.169998,74.730003,73.000000,73.349998,58.783066,4379000,-0.157993,0.0
4,2011-07-12,73.620003,73.860001,71.790001,71.930000,57.645065,5773000,0.360000,1.0
...,...,...,...,...,...,...,...,...,...
1271,2017-02-03,162.990005,163.559998,162.309998,162.399994,150.568100,2981700,0.055833,1.0
1272,2017-02-06,162.419998,164.080002,162.380005,163.979996,152.032944,3110500,0.055833,1.0
1273,2017-02-07,165.000000,167.419998,164.869995,166.500000,154.369339,4243200,-0.273000,0.0
1274,2017-02-10,165.250000,166.449997,164.470001,166.229996,155.444748,2689700,0.055833,1.0


In [22]:
df.dtypes

Date             datetime64[ns]
Open                    float64
High                    float64
Low                     float64
Close                   float64
Adj Close               float64
Volume                    int64
compound_mean           float64
Label                   float64
dtype: object

In [24]:
array = df.values

In [25]:
#creating training and testing datasets
X = array[:,1:8]
Y = array[:,8]

In [26]:
#standardising features, fitting and transforming X
X = sklearn.preprocessing.MinMaxScaler().fit_transform(X)

In [27]:
#casting Y to data type integer
Y = Y.astype('int')

In [28]:
print(X[0])
print(df.columns[1:8])

[0.15129418 0.14381154 0.16181233 0.15513382 0.12252169 0.06436869
 0.1636901 ]
Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'compound_mean'], dtype='object')


In [29]:
#choosing best features for the model
test = SelectKBest(score_func=chi2, k=5)
fit = test.fit(X, Y)
np.set_printoptions(precision=3)
print(fit.scores_)
features = fit.transform(X)
print(features[0:5,:])

[0.05  0.05  0.047 0.046 0.017 0.085 0.235]
[[0.151 0.144 0.162 0.064 0.164]
 [0.162 0.153 0.169 0.092 0.521]
 [0.165 0.148 0.167 0.071 0.528]
 [0.152 0.14  0.153 0.079 0.375]
 [0.147 0.132 0.142 0.111 0.701]]


In [30]:
#Open, High, Low, Volume and compound mean give the most information
features

array([[0.151, 0.144, 0.162, 0.064, 0.164],
       [0.162, 0.153, 0.169, 0.092, 0.521],
       [0.165, 0.148, 0.167, 0.071, 0.528],
       ...,
       [0.979, 0.977, 0.979, 0.076, 0.303],
       [0.982, 0.968, 0.975, 0.04 , 0.509],
       [0.993, 0.992, 0.992, 0.065, 0.247]])

In [31]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.model_selection import train_test_split

#building and training a model
X_train, X_test, y_train, y_test = train_test_split(features, Y, test_size=0.25)
model = Sequential()
model.add(Dense(16, input_shape=(5,) ))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=100, min_delta=0.0001, restore_best_weights = True)
history = model.fit(X_train, y_train, epochs=100,
                    validation_data= (X_test,y_test),
                              callbacks=[es])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 957 samples, validate on 319 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100


Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [32]:
#evaluating the model
model.evaluate(X_test, y_test)



[0.6973692877539273, 0.5517241358757019]

# LSTM AND GRU METHOD

In [28]:
normalized_df1 = copy.deepcopy(df3)

In [29]:
normalized_df1 = normalized_df1[['Open','High','Close','Low','Volume','compound_mean']]

In [30]:
#normalizing function
def normalized_df(df):
    normalized_df=(df-df.mean())/df.std()
    return normalized_df

In [31]:
#for later
normalized_df2 = copy.deepcopy(normalized_df1)
normalized_df3 = copy.deepcopy(normalized_df1)

In [32]:
normalized_df2

Unnamed: 0,Open,High,Close,Low,Volume,compound_mean
0,74.129997,75.160004,74.739998,74.010002,3757800,-0.493900
1,75.330002,76.199997,75.989998,74.849998,4976900,0.074200
2,75.580002,75.580002,75.070000,74.570000,4051200,0.084700
3,74.169998,74.730003,73.349998,73.000000,4379000,-0.157993
4,73.620003,73.860001,71.930000,71.790001,5773000,0.360000
...,...,...,...,...,...,...
1272,162.419998,164.080002,163.979996,162.380005,3110500,0.055833
1273,165.000000,167.419998,166.500000,164.869995,4243200,-0.273000
1274,165.250000,166.449997,166.229996,164.470001,2689700,0.055833
1275,166.449997,169.070007,168.029999,166.350006,3765000,-0.361200


In [33]:
mean = normalized_df1.mean(axis = 0)
normalized_df1 -= mean
std = normalized_df1.std(axis=0)
normalized_df1 /= std

In [34]:
#adding label: up or down or steady
def add_label(df):
    idx = len(df.columns)
    new_col = np.where(df['Close'] >= df['Close'].shift(1), 1, 0)  
    df.insert(loc=idx, column='Label', value=new_col)
    df = df.fillna(0)

In [35]:
add_label(normalized_df1)

In [36]:
normalized_df1 = normalized_df1.values

In [37]:
normalized_df1

array([[-1.28889326, -1.27782976, -1.26905769, ..., -0.33699085,
        -1.92943889,  0.        ],
       [-1.24896961, -1.24346657, -1.22752242, ...,  0.13029052,
         0.02875619,  1.        ],
       [-1.24065222, -1.26395229, -1.25809231, ..., -0.22453055,
         0.06494885,  0.        ],
       ...,
       [ 1.74262958,  1.73855101,  1.77099167, ..., -0.74639388,
        -0.03455252,  0.        ],
       [ 1.78255296,  1.82512071,  1.83080256, ..., -0.33423109,
        -1.47203265,  1.        ],
       [ 1.82413991,  1.81619931,  1.84641985, ..., -0.78832693,
        -0.03455252,  1.        ]])

In [38]:
from keras.utils import to_categorical

#defining our generator
def generator(data, lookback, delay, min_index, max_index,
              shuffle=False, batch_size=32, step=5):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)
        samples = np.zeros((len(rows),
                           lookback // step,
                           data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][-1]
        yield samples, to_categorical(targets)

In [39]:
lookback = 30
step = 10
delay = 1
batch_size = 32

In [40]:
#Splitting data into train, test and validation set
train_gen = generator(normalized_df1,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=round(0.6*len(normalized_df1)),
                      shuffle=False,
                      step=step,
                      batch_size=batch_size)
val_gen = generator(normalized_df1,
                    lookback=lookback,
                    delay=delay,
                    min_index=round(0.6*len(normalized_df1))+1,
                    max_index=round(0.8*len(normalized_df1)),
                    step=step,
                    batch_size=batch_size)
test_gen = generator(normalized_df1,
                     lookback=lookback,
                     delay=delay,
                     min_index=round(0.8*len(normalized_df1))+1,
                     max_index=None,
                     step=step,
                     batch_size=batch_size)

val_steps = (round(0.8*len(normalized_df1)) - round(0.6*len(normalized_df1))+1 - lookback) # how many steps to draw from val_gen in order to see the entire validation set
test_steps = (len(normalized_df1) - round(0.8*len(normalized_df1))+1 - lookback)
# How many steps to draw from test_gen in order to see the entire test set

In [41]:
#Building and training LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True,
                    input_shape=(None, normalized_df1.shape[-1]),
                    kernel_initializer='random_uniform'))
model.add(Dropout(0.4))
model.add(LSTM(60, dropout=0.0, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(20,activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer=RMSprop(),metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=100, min_delta=0.0001, restore_best_weights = True)

history = model.fit_generator(train_gen,
                              steps_per_epoch=2,
                              epochs=50,
                              validation_data=val_gen,
                              validation_steps=val_steps,
                              callbacks=[es])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [46]:
test_loss, test_acc = model.evaluate_generator(test_gen, steps=5)
print('test acc:', test_acc)
print("test_loss:", test_loss)

ValueError: `steps=None` is only valid for a generator based on the `keras.utils.Sequence` class. Please specify `steps` or use the `keras.utils.Sequence` class.

In [43]:
test_loss, test_acc = model.evaluate_generator(test_gen, steps=4)
print('test acc:', test_acc)
print("test_loss:", test_loss)

test acc: 0.5546875
test_loss: 0.247816264629364


In [None]:
#Building and training a model with GRU
model = Sequential()
model.add(layers.GRU(32,
                     dropout=0.3,
                     recurrent_dropout=0.2,
                     return_sequences=True,
                     input_shape=(None, normalized_df1.shape[-1])))
model.add(layers.GRU(64, activation='relu',
                     dropout=0.3,
                     recurrent_dropout=0.1))
model.add(layers.Dense(2, activation='softmax'))
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=50, min_delta=0.0001, restore_best_weights = True)
    
history = model.fit_generator(train_gen,
                              steps_per_epoch=2,
                              epochs=250,
                              validation_data=val_gen,
                              validation_steps=val_steps,
                              callbacks=[es])

In [None]:
#Evaluating the model
test_loss, test_acc = model.evaluate_generator(test_gen, steps=3)
print('test acc:', test_acc)
print("test_loss:", test_loss)

## Without sentiment

In [None]:
#normalizing data
normalized_df1 = normalized_df2

mean = normalized_df1.mean(axis = 0)
normalized_df1 -= mean
std = normalized_df1.std(axis=0)
normalized_df1 /= std
#adding label: up/down or steady
def add_label(df):
    idx = len(df.columns)
    new_col = np.where(df['Close'] >= df['Close'].shift(1), 1, 0)  
    df.insert(loc=idx, column='Label', value=new_col)
    df = df.fillna(0)
    
add_label(normalized_df1)

In [None]:
#applying function 
del normalized_df1['compound_mean']

In [None]:
normalized_df1

In [None]:
normalized_df1 = normalized_df1.values

In [None]:
#Splitting into tain validation and test set
train_gen = generator(normalized_df1,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=round(0.6*len(normalized_df1)),
                      shuffle=False,
                      step=step,
                      batch_size=batch_size)
val_gen = generator(normalized_df1,
                    lookback=lookback,
                    delay=delay,
                    min_index=round(0.6*len(normalized_df1))+1,
                    max_index=round(0.8*len(normalized_df1)),
                    step=step,
                    batch_size=batch_size)
test_gen = generator(normalized_df1,
                     lookback=lookback,
                     delay=delay,
                     min_index=round(0.8*len(normalized_df1))+1,
                     max_index=None,
                     step=step,
                     batch_size=batch_size)

val_steps = (round(0.8*len(normalized_df1)) - round(0.6*len(normalized_df1))+1 - lookback) # how many steps to draw from val_gen in order to see the entire validation set
test_steps = (len(normalized_df1) - round(0.8*len(normalized_df1))+1 - lookback)
# How many steps to draw from test_gen in order to see the entire test set

In [None]:
#Bilding and training LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True,
                    input_shape=(None, normalized_df1.shape[-1]),
                    kernel_initializer='random_uniform'))
model.add(Dropout(0.4))
model.add(LSTM(60, dropout=0.0, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(20,activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer=RMSprop(),metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=50, min_delta=0.0001, restore_best_weights = True)

history = model.fit_generator(train_gen,
                              steps_per_epoch=2,
                              epochs=200,
                              validation_data=val_gen,
                              validation_steps=val_steps,
                              callbacks=[es])

In [None]:
#evaluating the model
test_loss, test_acc = model.evaluate_generator(test_gen, steps=4)
print('test acc:', test_acc)
print("test_loss:", test_loss)

In [None]:
#Bilding and training GRU model
model = Sequential()
model.add(layers.GRU(32,
                     dropout=0.3,
                     recurrent_dropout=0.2,
                     return_sequences=True,
                     input_shape=(None, normalized_df1.shape[-1])))
model.add(layers.GRU(64, activation='relu',
                     dropout=0.3,
                     recurrent_dropout=0.2))
model.add(layers.Dense(2, activation='softmax'))
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=100, min_delta=0.0001, restore_best_weights = True)
    
history = model.fit_generator(train_gen,
                              steps_per_epoch=2,
                              epochs=500,
                              validation_data=val_gen,
                              validation_steps=val_steps,
                              callbacks=[es])

In [None]:
#evaluating the model
test_loss, test_acc = model.evaluate_generator(test_gen, steps=4)
print('test acc:', test_acc)
print("test_loss:", test_loss)