<a href="https://colab.research.google.com/github/Seervichirag0/generative_ai/blob/main/Stock_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
!pip install yfinance



In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
data = yf.download('GOOGL', start= '2018-01-01',interval='1d')

[*********************100%%**********************]  1 of 1 completed


In [None]:
data.shape

(1458, 6)

In [None]:
data.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,52.651001,53.799,52.651001,53.6605,53.6605,31766000
2018-01-03,53.696499,54.805,53.671501,54.576,54.576,31318000
2018-01-04,54.8545,55.203999,54.713001,54.787998,54.787998,26052000
2018-01-05,55.172501,55.679001,55.09,55.5145,55.5145,30250000
2018-01-08,55.549999,55.958,55.5,55.710499,55.710499,24644000


In [None]:
#Sort the data points based on index
data.sort_index(inplace=True)

In [None]:
#Removing duplicate indexes
data = data.loc[~data.index.duplicated(keep='first')]

In [None]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-10-13,139.380005,140.0,136.619995,137.360001,137.360001,23420500
2023-10-16,138.169998,139.630005,137.990005,139.100006,139.100006,28501900
2023-10-17,138.630005,139.899994,137.179993,139.720001,139.720001,23500700


In [None]:
#Checking if there are any missing values
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [None]:
import plotly.graph_objects as go

#Check the trend in closing values
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index , y=data['Close'],mode ='lines'))
fig.update_layout(height = 500, width = 900, xaxis_title='Date', yaxis_title = 'Close')

fig.show()

In [None]:
#Check the trend in volume values
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index , y=data['Volume'],mode ='lines'))
fig.update_layout(height = 500, width = 900, xaxis_title='Date', yaxis_title = 'Volume')

fig.show()

In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2,column_widths=[0.5, 0.5])

fig.append_trace(go.Scatter(x=data.index,y=data['Open']), row=1, col=1)

fig.append_trace(go.Scatter(x=data.index,y=data['Close']), row=2, col=1)

fig.append_trace(go.Scatter(x=data.index,y=data['High']), row=1, col=2)

fig.append_trace(go.Scatter(x=data.index,y=data['Low']), row=2, col=2)

#Update xaxis properties
fig.update_xaxes(title_text="Open", row=1, col=1)
fig.update_xaxes(title_text="High", row=1, col=2)
fig.update_xaxes(title_text="Close", row=2, col=1)
fig.update_xaxes(title_text="Low", row=2, col=2)

# Update yaxis properties
fig.update_yaxes(title_text="Date", row=1, col=1)
fig.update_yaxes(title_text="Date", row=1, col=2)
fig.update_yaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Date", row=2, col=2)

fig.update_layout(title_text="Subplots")
fig.show()

#Preprocessing of the model


In [None]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [None]:
#As open close high and low have very similar pattern so taking only required data
data = data[['Close','Volume']]
data.head(5)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,53.6605,31766000
2018-01-03,54.576,31318000
2018-01-04,54.787998,26052000
2018-01-05,55.5145,30250000
2018-01-08,55.710499,24644000


In [None]:
#Confirm the test setting length
test_length = data[(data.index >= '2023-05-01')].shape[0]

In [None]:
def Create_features_and_targets(data,feature_length):
  X=[]
  Y=[]

  for i in tnrange(len(data)- feature_length):
    X.append(data.iloc[i:i+feature_length,:].values)
    Y.append(data['Close'].values[i+feature_length])

  X=np.array(X)
  Y=np.array(Y)

  return X,Y

In [None]:
X,Y = Create_features_and_targets(data,32)

  0%|          | 0/1426 [00:00<?, ?it/s]

In [None]:
X.shape, Y.shape

((1426, 32, 2), (1426,))

In [None]:
X_train, X_test, Y_train, Y_test = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

In [None]:
X_train.shape, Y_train.shape

((1308, 32, 2), (1308,))

In [None]:
X_test.shape, Y_test.shape

((118, 32, 2), (118,))

In [None]:
#Create a scaler to scale vectors with multiple dimensions
class MultiDimensionScaler():
  def __init__(self):
    self.scalers = []       #initialize empty list of scalers - this keeps track of all the scalers applied on data set

  def fit_transform(self, X):  #looping the third dimension of data
    total_dims = X.shape[2]     #At each loop we create a new scaler and fit it over that dimension
    for i in range(total_dims):    #We collect the fitted scanners and return the transformed data
      Scaler = MinMaxScaler()
      X[:,:,i] = Scaler.fit_transform(X[:,:,i])
      self.scalers.append(Scaler)
    return X

  def transform(self, X):     #looping the third dimension of data but here we apply the fitted scalers on the data
    for i in range(X.shape[2]):
      X[:,:,i] = self.scalers[i].transform(X[:,:,i])
    return X

In [None]:
Feature_scaler = MultiDimensionScaler()
X_train = Feature_scaler.fit_transform(X_train)
X_test = Feature_scaler.fit_transform(X_test)

In [None]:
Target_scaler = MinMaxScaler()
Y_train = Target_scaler.fit_transform(Y_train.reshape(-1,1))
Y_test = Target_scaler.fit_transform(Y_test.reshape(-1,1))

In [None]:
def save_object(obj, name:str):
  pickle_out = open(f"{name}.pck","wb")
  pickle.dump(obj, pickle_out)
  pickle_out.close()

def load_object(name:str):
  pickle_in = open(f"{name}.pck","rb")
  data = pickle.load(pickle_in)
  return data

Creating a LSTM model

In [None]:
#Define callback for our model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

save_best = ModelCheckpoint("best_weight.h5", monitor = 'val_loss', save_best_only = True, save_weights_only = True)  #save the model at various checkpoints keeping validation loss to minimum
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.25, patience = 5, min_lr = 0.00001, verbose = 1) #is used to reduce the learning rate when there is no change in validation loss - factor of 4 when we see 5 epochs with no improvement in val_loss

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512,return_sequences=True, recurrent_dropout = 0.1, input_shape = (32,2)))) #First layer so input shape
model.add(LSTM(256, recurrent_dropout=0.1))  #Dropout between the recurring states
model.add(Dropout(0.3))
model.add(Dense(64, activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='elu'))
model.add(Dense(1,activation='linear')) #Final layer


In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer = optimizer) #Mean sqaured error as loss

In [None]:
from sklearn.utils import validation
history = model.fit(X_train,Y_train,
                    epochs =10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle = False,
                    validation_data = (X_test,Y_test),
                    callbacks=[reduce_lr,save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: ignored

In [None]:
#Load the best weights
model.load_weights("best_weight.h5")

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions = Target_scaler.inverse_transform(predictions)
Actual = Target_scaler.inverse_transform(Y_test)

In [None]:
predictions.shape

In [None]:
predictions = np.squeeze(predictions, axis=1)
Actual = np.squeeze(Actual,axis=1)

In [None]:
predictions

In [None]:
#Check the predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index,y = Actual,mode='lines',name='Actual'))
fig.add_trace(go.Scatter(x=data.index,y = predictions,mode='lines',name='Predicted'))
fig.show()

In [None]:
total_features = np.concatenate((X_train,X_test),axis=0)

In [None]:
total_targets = np.concatenate((Y_train,Y_test),axis=0)

In [None]:
predictions = model.predict(total_features)

In [None]:
predictions = Target_scaler.inverse_transform(predictions)
Actual = Target_scaler.inverse_transform(total_targets)

In [None]:
predictions = np.squeeze(predictions, axis=1)
Actual = np.squeeze(Actual,axis=1)

In [None]:
predictions.shape

In [None]:
#Check the predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index,y = Actual,mode='lines',name='Actual'))
fig.add_trace(go.Scatter(x=data.index,y = predictions,mode='lines',name='Predicted'))
fig.show()