<a href="https://colab.research.google.com/github/Sakshibisen11/Stock-Market-Analysis/blob/main/Stock_Market_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Performing Technical Analysis on past data in order to predict the stock price in future

In [None]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
data=yf.download("IBM",start="2018-01-01",interval='1d')

[*********************100%***********************]  1 of 1 completed


In [None]:
data.shape

(1187, 6)

In [None]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,147.705551,148.001907,146.787766,147.466537,117.420235,4395815
2018-01-03,150.420654,152.782028,149.455063,151.520081,120.647858,9875914
2018-01-04,152.629059,155.181641,152.361374,154.588913,123.0914,7903785


About the features

---


**Open** :Price at which the first trade of the day takes place.

---


**Close**:Price at which the last trade of the day takes place.

---


**High**:Highest trade amount of the day

---


**Low**:Lowest trade amount of the day

---


**Adjusted close**:Closing price adjusted for corporate actions,one of them being splitting the stock,where a single stock is split into a certain number of shares,which in turn increases the number of shares and decreases the price.However,the overall price of stock remains unchanged

---


**Volume**:Number of shares traded in a day

# UNDERSTANDING TRENDS WITHIN THE DATA

In [None]:

data.sort_index(inplace=True) #sort data according to date


In [None]:
#Remove any duplicate index
data=data.loc[~data.index.duplicated(keep="first")]

In [None]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-09-15,127.389999,127.470001,124.900002,125.489998,125.489998,5141700
2022-09-16,124.360001,127.529999,123.830002,127.269997,127.269997,9838600
2022-09-19,126.489998,128.050003,126.279999,127.620003,127.620003,2405211


In [None]:
#check for null values
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [None]:
#statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1187.0,1187.0,1187.0,1187.0,1187.0,1187.0
mean,129.623639,130.783438,128.451664,129.61771,115.481172,5237877.0
std,11.130292,10.984839,11.229093,11.102922,11.123691,3242377.0
min,90.439774,93.441681,86.577438,90.602295,79.847,1257397.0
25%,120.578392,121.682602,119.488529,120.487572,108.593086,3468798.0
50%,130.602295,131.779999,129.600006,130.678772,113.931854,4389016.0
75%,137.159996,138.154869,136.128105,137.237091,124.823498,5814592.0
max,162.523895,163.604202,160.755264,161.682602,141.102219,39814420.0


In [None]:
import plotly.graph_objects as go
#check for trend in closing values
fig=go.Figure()
fig.add_trace(go.Scatter(x=data.index,y=data['Close'],mode='lines'))
fig.update_layout(height=500,width=900,
                  xaxis_title='Date',yaxis_title='Close')
fig.show()

In [None]:
#check for trend in volume
fig=go.Figure()
fig.add_trace(go.Scatter(x=data.index,y=data['Volume'],mode='lines'))
fig.update_layout(height=500,width=900,
                  xaxis_title='Date',yaxis_title='Volume')
fig.show()

Data Preprocessing

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange #track progress of preprocessing

In [None]:
#Filter only required data
data=data[['Close','Volume']]   #features open,close,high,low,adj close and very similar pattern hene only one of them has been chosen
data.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,147.466537,4395815
2018-01-03,151.520081,9875914
2018-01-04,154.588913,7903785
2018-01-05,155.344162,5434807
2018-01-08,156.281067,5478425


In [None]:
#size of test set-
test_length=data[(data.index>='2020-09-01')].shape[0]
test_length

516

In [None]:
#feature_length-number of datapoints the model will look at before predicting the next datapoint
def CreateFeatures_and_Targets(data,feature_length):
  X=[] #features
  Y=[] #target
  for i in tnrange(len(data)-feature_length):
    X.append(data.iloc[i:i+feature_length,:].values)
    Y.append(data['Close'].values[i+feature_length])
  X=np.array(X)
  Y=np.array(Y)
  return X,Y


In [None]:
X,Y=CreateFeatures_and_Targets(data,32)
X

  0%|          | 0/1155 [00:00<?, ?it/s]

array([[[1.47466537e+02, 4.39581500e+06],
        [1.51520081e+02, 9.87591400e+06],
        [1.54588913e+02, 7.90378500e+06],
        ...,
        [1.44120453e+02, 5.11598600e+06],
        [1.47954117e+02, 5.73103400e+06],
        [1.49149139e+02, 5.88228600e+06]],

       [[1.51520081e+02, 9.87591400e+06],
        [1.54588913e+02, 7.90378500e+06],
        [1.55344162e+02, 5.43480700e+06],
        ...,
        [1.47954117e+02, 5.73103400e+06],
        [1.49149139e+02, 5.88228600e+06],
        [1.49311661e+02, 4.47991300e+06]],

       [[1.54588913e+02, 7.90378500e+06],
        [1.55344162e+02, 5.43480700e+06],
        [1.56281067e+02, 5.47842500e+06],
        ...,
        [1.49149139e+02, 5.88228600e+06],
        [1.49311661e+02, 4.47991300e+06],
        [1.48336517e+02, 4.69612200e+06]],

       ...,

       [[1.32039993e+02, 4.14210000e+06],
        [1.31809998e+02, 4.40360000e+06],
        [1.32339996e+02, 3.08850000e+06],
        ...,
        [1.30660004e+02, 3.74110000e+06],
     

In [None]:
Xtrain,Xtest,Ytrain,Ytest=X[:-test_length],X[-test_length:],Y[:-test_length],Y[-test_length:]

In [None]:
Xtrain.shape,Ytrain.shape


((639, 32, 2), (639,))

In [None]:
Xtest.shape,Ytest.shape

((516, 32, 2), (516,))

In [None]:
 #Features are present in 3D whereas Scalers provided by sklearn support scaling in 2D
 #create a scaler to scale vectors with multiple dimensions
 #create customized fit_transform and transform methods
class MultiDimensionScaler():
   def __init__(self):
     self.scalers=[]
   def fit_transform(self,X):
     total_dims=X.shape[2]
     for i in range(total_dims):
       Scaler=MinMaxScaler()
       X[:,:,i]=Scaler.fit_transform(X[:,:,i])
       self.scalers.append(Scaler)
     return X
   def transform(self,X):
     for i in range(X.shape[2]):
       X[:,:,i]=self.scalers[i].transform(X[:,:,i])
     return X



In [None]:
Feature_Scaler=MultiDimensionScaler()
Xtrain=Feature_Scaler.fit_transform(Xtrain)
Xtest=Feature_Scaler.transform(Xtest)

In [None]:
Xtrain.shape

(639, 32, 2)

In [None]:
#scaling the targets
Target_Scaler=MinMaxScaler()
Ytrain=Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest=Target_Scaler.transform(Ytest.reshape(-1,1)) #reshaping since the target is 1D


In [None]:
#For realtime predictions
def save_object(obj,name:str):
  pickle_out=open(f"{name}.pck","wb")
  pickle.dump(obj,pickle_out)
  pickle_out.close()
def load_object(name:str):
  pickle_in=open(f"{name}.pck","rb")
  data=pickle.load(pickle_in)
  return data

In [None]:
#save your objects for future purposes
save_object(Feature_Scaler,"Feature_Scaler")
save_object(Target_Scaler,"Target_Scaler")

TRAINING MODEL WITH LSTM

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
save_best=ModelCheckpoint("best_weight.h5",monitor='val_loss',save_best_only=True,save_weights_only=True) #save the model with least validation loss
reduce_lr=ReduceLROnPlateau(monitor='val_loss',factor=0.25,patience=5,min_lr=0.00001,verbose=1)
#Reduce the learning rate when there is no improvemnt in validation loss.Here since patience level=5 i.e wait till 5 continuous epocs with no improvement

In [None]:
#Building the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,Bidirectional
model=Sequential()
model.add(Bidirectional(LSTM(512,return_sequences=True,recurrent_dropout=0.1,input_shape=(32,2)))) #input_shape from X_train/test
model.add(LSTM(256,recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64,activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation="elu"))
model.add(Dense(1,activation="linear"))



In [None]:
#We are predicting a complex pattern and hence require frequent rate updates.In order to achieve this the batch size is kept smaller and SGD optimizer works better with small batch size as compared to Adam
optimizer=tf.keras.optimizers.SGD(learning_rate=0.002) 
model.compile(loss='mse',optimizer=optimizer)


In [None]:
history=model.fit(Xtrain,Ytrain,
                  epochs=10,
                  batch_size=1,
                  verbose=1,
                  shuffle=False,
                  validation_data=(Xtest,Ytest),
                  callbacks=[reduce_lr,save_best]
                  )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
#load the best weights
model.load_weights("best_weight.h5")

In [None]:
predictions=model.predict(Xtest)

In [None]:
#Since the target values are passed to the model the predictions will also be scaled
predictions=Target_Scaler.inverse_transform(predictions)
actual=Target_Scaler.inverse_transform(Ytest)

In [None]:
predictions.shape

(516, 1)

In [None]:
#remove dimension to convert predictions to list
predictions=np.squeeze(predictions,axis=1) #remove any unwanted axis
actual=np.squeeze(actual,axis=1)

In [None]:
#Check predictions vs actual
fig=go.Figure()
fig.add_trace(go.Scatter(x=data.index[-test_length:],y=actual,mode='lines',name='Actual'))
fig.add_trace(go.Scatter(x=data.index[-test_length:],y=predictions,mode='lines',name='Predicted'))

In [None]:
#Performing same on entire dataset
total_features=np.concatenate((Xtrain,Xtest),axis=0)
total_targets=np.concatenate((Ytrain,Ytest),axis=0)

In [None]:
predictions=model.predict(total_features)

In [None]:
predictions=Target_Scaler.inverse_transform(predictions)
actual=Target_Scaler.inverse_transform(total_targets)

In [None]:
predictions=np.squeeze(predictions,axis=1)
actual=np.squeeze(actual,axis=1)

In [None]:
#check the trend in volume traded
fig=go.Figure()
fig.add_trace(go.Scatter(x=data.index,y=actual,mode='lines',name="Actual"))
fig.add_trace(go.Scatter(x=data.index,y=predictions,mode='lines',name="Predicted"))