In [2]:
!pip install yfinance



Setting up the Environment

In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
import tensorflow as tf

Fetching up the Stock Market Data

In [57]:
data = yf.download("GOOGL", start = "2018-01-01", interval = "1d")

[*********************100%%**********************]  1 of 1 completed


Understanding the Stock market Data

In [58]:
data.shape

(1446, 6)

In [59]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,52.651001,53.799,52.651001,53.6605,53.6605,31766000
2018-01-03,53.696499,54.805,53.671501,54.576,54.576,31318000
2018-01-04,54.8545,55.203999,54.713001,54.787998,54.787998,26052000


Understanding the Trends Within Data

In [60]:
data.sort_index(inplace = True)
data = data.loc[~data.index.duplicated(keep = 'first')]

In [61]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-09-27,128.570007,130.899994,128.570007,130.539993,130.539993,22746500
2023-09-28,129.839996,133.300003,129.789993,132.309998,132.309998,22491400
2023-09-29,133.279999,134.050003,130.5,130.860001,130.860001,15846132


In [62]:
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [63]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1446.0,1446.0,1446.0,1446.0,1446.0,1446.0
mean,89.26881,90.295226,88.286914,89.31123,89.31123,35098010.0
std,30.676279,30.991971,30.338871,30.65683,30.65683,15747480.0
min,49.216,50.605999,48.882999,49.233501,49.233501,9312000.0
25%,59.785001,60.28675,59.198874,59.818377,59.818377,25151000.0
50%,86.287251,87.252499,85.323002,86.344498,86.344498,31172000.0
75%,116.098125,117.868999,114.819128,116.508877,116.508877,40087500.0
max,151.25,151.546494,148.899002,149.838501,149.838501,133178000.0


In [64]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Close'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [12]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Volume'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [13]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Open'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['High'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [15]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Low'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [16]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Adj Close'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

Data Preparation

In [17]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

#chossing close and volume because close, open, high, adj close have pretty similar characterstics
data = data[['Close', 'Volume']]

In [18]:
test_length = data[(data.index >= '2020-09-01')].shape[0]

In [19]:
def CreateFeatures_and_Targets(data, feature_length):
  X = []
  Y = []
  for i in tnrange(len(data) - feature_length):
    X.append(data.iloc[i:i+feature_length, : ].values)
    Y.append(data['Close'].values[i+feature_length])
  X = np.array(X)
  Y = np.array(Y)

  return X,Y

In [20]:
X, Y = CreateFeatures_and_Targets(data,32)

  0%|          | 0/1414 [00:00<?, ?it/s]

In [21]:
X.shape, Y.shape

((1414, 32, 2), (1414,))

In [22]:
Xtrain, Xtest, Ytrain, Ytest = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

In [23]:
class Scaler():
   def __init__(self):
     self.scalers = []
   def fit_transform(self, X):
     total_dims = X.shape[2]
     for i in range(total_dims):
       scaler = MinMaxScaler()
       X[:, : , i] = scaler.fit_transform(X[:, : , i])
       self.scalers.append(scaler)
     return X

   def transform(self, X):
     for i in range(X.shape[2]):
       X[:,:,i] = self.scalers[i].transform(X[:,:,i])
     return X


In [24]:
Feature_scaler = Scaler()
Xtrain = Feature_scaler.fit_transform(Xtrain)
Xtest = Feature_scaler.transform(Xtest)

In [25]:
Target_scaler = MinMaxScaler()
Ytrain = Target_scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_scaler.transform(Ytest.reshape(-1,1))


Model Building

In [78]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor = 'val_loss', save_best_only = True, save_weights_only = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.25, patience = 5, min_lr = 0.0000001, verbose = 1)



In [79]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional

model = Sequential()
model.add(Bidirectional(LSTM(512, return_sequences = True, recurrent_dropout = 0.1, input_shape = (32,2))))
model.add(LSTM(256, recurrent_dropout = 0.1))
model.add(Dropout(0.3))
model.add(Dense(64, activation = 'elu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'elu'))
model.add(Dense(1, activation = 'linear'))





In [80]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss = 'mse', optimizer = optimizer)

In [29]:
history = model.fit(Xtrain, Ytrain,
                    epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle  = False,
                    validation_data = (Xtest, Ytest),
                    callbacks = [reduce_lr, save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [30]:
model.load_weights("best_weights.h5")


In [31]:
Predictions = model.predict(Xtest)



In [32]:
Predictions = Target_scaler.inverse_transform(Predictions)
Actual = Target_scaler.inverse_transform(Ytest)

In [33]:
Predictions = np.squeeze(Predictions, axis = 1)
Actual = np.squeeze(Actual, axis = 1)

In [37]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Predictions, mode = 'lines', name = 'Predictions'))
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Actual, mode = 'lines',name = 'Actual'))
fig.show()

In [38]:
Total_features = np.concatenate((Xtrain, Xtest), axis = 0)
Total_targets = np.concatenate((Ytrain, Ytest), axis = 0)

In [40]:
Predictions = model.predict(Total_features)
Predictions = Target_scaler.inverse_transform(Predictions)
Actual = Target_scaler.inverse_transform(Total_targets)
Predictions = np.squeeze(Predictions, axis = 1)
Actual = np.squeeze(Actual, axis = 1)

fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Predictions, mode = 'lines', name = 'Predictions'))
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Actual, mode = 'lines',name = 'Actual'))
fig.show()



In [53]:
# Our previous model Correctly captured the pattern but couldn't capture the minute granular details (the granular ups and downs) so we need to add another feature

import requests
response = requests.get('https://www.alphavantage.co/query?function=RSI&symbol=GOOGL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM')

response = response.json()

rsi_data = pd.DataFrame.from_dict(response['Technical Analysis: RSI'], orient = 'index')
rsi_data = rsi_data[rsi_data.index>='2018-01-01']

rsi_data['RSI'] = rsi_data['RSI'].astype(np.float64)



In [70]:
data = data.merge(rsi_data, left_index = True, right_index = True, how = 'inner')

In [72]:
# running the code again with this new data



In [73]:
X, Y = CreateFeatures_and_Targets(data,32)
Xtrain, Xtest, Ytrain, Ytest = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

  0%|          | 0/1413 [00:00<?, ?it/s]

In [81]:
Feature_scaler = Scaler()
Xtrain = Feature_scaler.fit_transform(Xtrain)
Xtest = Feature_scaler.transform(Xtest)

Target_scaler = MinMaxScaler()
Ytrain = Target_scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_scaler.transform(Ytest.reshape(-1,1))



history = model.fit(Xtrain, Ytrain,
                    epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle  = False,
                    validation_data = (Xtest, Ytest),
                    callbacks = [reduce_lr, save_best])

model.load_weights("best_weights.h5")

Predictions = model.predict(Xtest)

Predictions = Target_scaler.inverse_transform(Predictions)
Actual = Target_scaler.inverse_transform(Ytest)

Predictions = np.squeeze(Predictions, axis = 1)
Actual = np.squeeze(Actual, axis = 1)


fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Predictions, mode = 'lines', name = 'Predictions'))
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Actual, mode = 'lines',name = 'Actual'))
fig.show()

Total_features = np.concatenate((Xtrain, Xtest), axis = 0)
Total_targets = np.concatenate((Ytrain, Ytest), axis = 0)


Predictions = model.predict(Total_features)
Predictions = Target_scaler.inverse_transform(Predictions)
Actual = Target_scaler.inverse_transform(Total_targets)
Predictions = np.squeeze(Predictions, axis = 1)
Actual = np.squeeze(Actual, axis = 1)

fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Predictions, mode = 'lines', name = 'Predictions'))
fig.add_trace(go.Scatter(x = data.index[-test_length:], y = Actual, mode = 'lines', name = 'Actual'))
fig.show()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


