<a href="https://colab.research.google.com/github/Rajnandini17/Deep-Learning-projects/blob/main/Stock_Market_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install yfinance



In [21]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [10]:
data = yf.download("GOOGL", start = "2020-01-01", interval = '1d')

[*********************100%%**********************]  1 of 1 completed


In [11]:
data.shape

(937, 6)

In [12]:
data.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,67.420502,68.433998,67.324501,68.433998,68.433998,27278000
2020-01-03,67.400002,68.6875,67.365997,68.075996,68.075996,23408000
2020-01-06,67.581497,69.916,67.550003,69.890503,69.890503,46768000
2020-01-07,70.023003,70.175003,69.578003,69.755501,69.755501,34330000
2020-01-08,69.740997,70.592499,69.6315,70.251999,70.251999,35314000


In [13]:
#sorting data points based on index
data.sort_index(inplace = True)

In [14]:
#removing duplicates
data = data.loc[~data.index.duplicated(keep='first')]

In [15]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-09-15,137.979996,138.520004,136.479996,137.399994,137.399994,38908400
2023-09-18,136.610001,139.160004,136.610001,138.210007,138.210007,21861300
2023-09-19,137.419998,138.410004,136.619995,138.039993,138.039993,20353700
2023-09-20,138.080002,138.080002,133.619995,133.740005,133.740005,29879000
2023-09-21,131.440002,132.154999,130.070007,131.240005,131.240005,15604662


In [16]:
#checking for missing values
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [17]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,937.0,937.0,937.0,937.0,937.0,937.0
mean,105.887115,107.166263,104.676736,105.944736,105.944736,34905060.0
std,25.000466,25.108378,24.79542,24.931061,24.931061,15193200.0
min,52.818501,53.345501,50.443501,52.706501,52.706501,9312000.0
25%,87.011002,88.249001,86.32,87.031998,87.031998,25154000.0
50%,105.772003,107.257004,104.571503,105.970001,105.970001,31045400.0
75%,126.25,128.212494,125.360001,126.730003,126.730003,40005100.0
max,151.25,151.546494,148.899002,149.838501,149.838501,123200000.0


In [19]:
#checking trend in closing values
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index, y = data['Close'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title='Date', yaxis_title = 'Close')
fig.show()

In [20]:
#Checking trend in Volume Traded
fig = go.Figure()
fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900, xaxis_title='Date' , yaxis_title='Volume')
fig.show()

##DATA PREPARATION

In [22]:
#filtering only required data
data = data[['Close', 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02,68.433998,27278000
2020-01-03,68.075996,23408000
2020-01-06,69.890503,46768000


In [23]:
#confirm the testing set length
test_length = data[(data.index >= '2021-09-01')].shape[0]

In [24]:
def CreateFeatures_and_Targets(data, feature_length):
  X = []
  Y = []
  for i in tnrange(len(data) - feature_length):
    X.append(data.iloc[i : i + feature_length, :].values)
    Y.append(data["Close"].values[i+feature_length])

  X = np.array(X)
  Y = np.array(Y)
  return X, Y

In [25]:
X, Y = CreateFeatures_and_Targets(data, 32)

  0%|          | 0/905 [00:00<?, ?it/s]

In [26]:
#check the shapes
X.shape, Y.shape

((905, 32, 2), (905,))

In [27]:
#splitting data into train and test
X_train, Y_train, X_test, Y_test = X[:-test_length], Y[:-test_length], X[-test_length:], Y[-test_length:]

In [28]:
X_train.shape, Y_train.shape

((388, 32, 2), (388,))

In [29]:
X_test.shape, Y_test.shape

((517, 32, 2), (517,))

In [30]:
# Create a Scaler to Scale Vectors with Multiple Dimensions
class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self , X):
        total_dims = X.shape[2]
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, :, i] = Scaler.fit_transform(X[:,:,i])
            self.scalers.append(Scaler)
        return X

    def transform(self , X):
        for i in range(X.shape[2]):
            X[:, :, i] = self.scalers[i].transform(X[:,:,i])
        return X

In [31]:
Feature_Scaler = MultiDimensionScaler()
X_train = Feature_Scaler.fit_transform(X_train)
X_test = Feature_Scaler.transform(X_test)

In [32]:
Target_Scaler = MinMaxScaler()
Y_train = Target_Scaler.fit_transform(Y_train.reshape(-1,1))
Y_test = Target_Scaler.transform(Y_test.reshape(-1,1))

In [33]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [34]:
#Saving objects for future purposes
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

##BUILDING THE MODEL

In [35]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=5, min_lr=0.00001,verbose = 1)

In [36]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional , BatchNormalization

model = Sequential()

model.add(Bidirectional(LSTM(512 ,return_sequences=True , recurrent_dropout=0.1, input_shape=(32, 3))))
model.add(LSTM(256 ,recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32 , activation='elu'))
model.add(Dense(1 , activation='linear'))



In [37]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [38]:
history = model.fit(X_train, Y_train,
            epochs=10,
            batch_size = 1,
            verbose=1,
            shuffle=False ,
            validation_data=(X_test , Y_test),
            callbacks=[reduce_lr , save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##VISUALIZING THE PREDICTIONS ON TEST DATA

In [39]:
 #Load the best weights
model.load_weights("best_weights.h5")

In [40]:
Predictions = model.predict(X_test)



In [41]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Y_test)

In [42]:
Predictions.shape

(517, 1)

In [43]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [44]:
# Check the Predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

##VISUALIZING THE PREDICTIONS ON WHOLE DATA

In [45]:
Total_features = np.concatenate((X_train , X_test) , axis = 0)

In [46]:
Total_Targets = np.concatenate((Y_train , Y_test) , axis = 0)

In [47]:
Predictions = model.predict(Total_features)



In [48]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [49]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [50]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [51]:
# Save and Load the whole model
model.save("Model.h5")
loaded_model = tf.keras.models.load_model("Model.h5")


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



##REALTIME PREDICTION

In [52]:
def PredictStockPrice(Model , DataFrame , PreviousDate , feature_length = 32):
    idx_location = DataFrame.index.get_loc(PreviousDate)
    Features = DataFrame.iloc[idx_location - feature_length : idx_location,:].values
    Features = np.expand_dims(Features , axis = 0)
    Features = Feature_Scaler.transform(Features)
    Prediction = Model.predict(Features)
    Prediction = Target_Scaler.inverse_transform(Prediction)
    return Prediction[0][0]

In [53]:
PredictStockPrice(loaded_model , data , '2021-01-14')



102.62556