In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import seaborn as sn
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
from IPython.display import clear_output
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout, InputLayer, Activation, concatenate
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go

# adding path so as to import created .py file
import sys
sys.path.append('../') 

# importing a function to visualize live training result 
from plotlearning import PlotLearning       

2024-03-10 19:59:08.620942: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-10 19:59:08.654569: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


For this notebook we are gonna predict SBIN's stock price 

In [2]:
# data=yf.download("SBIN.NS")
data = pd.read_csv('sbin.csv')

In [3]:
data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1996-01-01,18.691147,18.978922,18.540184,18.823240,12.409932,43733533
1,1996-01-02,18.894005,18.964767,17.738192,18.224106,12.014930,56167280
2,1996-01-03,18.327892,18.568489,17.643839,17.738192,11.694572,68296318
3,1996-01-04,17.502312,17.832542,17.223972,17.676863,11.654140,86073880
4,1996-01-05,17.738192,17.785366,17.459852,17.577793,11.588824,76613039
...,...,...,...,...,...,...,...
7078,2024-03-01,752.000000,772.000000,751.950012,769.150024,769.150024,17348526
7079,2024-03-04,774.400024,777.000000,769.000000,772.049988,772.049988,9789141
7080,2024-03-05,769.500000,786.950012,769.099976,783.950012,783.950012,18088847
7081,2024-03-06,783.650024,790.299988,772.900024,783.900024,783.900024,26710525


For this notebook we will predict Closing price of 'Google' stocks 

In [4]:
# data.to_csv("sbin.csv",)

In [5]:
df=data[['Date','Close']].copy()

In [6]:
df.head()

Unnamed: 0,Date,Close
0,1996-01-01,18.82324
1,1996-01-02,18.224106
2,1996-01-03,17.738192
3,1996-01-04,17.676863
4,1996-01-05,17.577793


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7083 entries, 0 to 7082
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    7083 non-null   object 
 1   Close   7083 non-null   float64
dtypes: float64(1), object(1)
memory usage: 110.8+ KB


In [8]:
# converting object data type to date time format
df['Date'] = df['Date'].astype('datetime64[ns]',)

Let's see how stock price went over past days

In [9]:
fig=go.Figure()
fig.add_scatter(x=df['Date'],y=df['Close'])
fig.update_layout(template="plotly_dark",
                  hovermode='x unified',
                  xaxis_title="Date",
                  yaxis_title="Close",
                  title="Google's Stock Price")


google's stock price is increasing, how ever a downfall can be seen after the end of year 2021<br>
and its increased after the year 2022 ends  

In [10]:
# Scaling our data as LSTM needs data to be normalizied
le = MinMaxScaler()

scaled_close_price = le.fit_transform(df.iloc[:,1:])

In [11]:
scaled_close_price

array([[0.00706998],
       [0.0062966 ],
       [0.00566938],
       ...,
       [0.99470769],
       [0.99464316],
       [1.        ]])

In [12]:
scaled_close_price.shape

(7083, 1)

In [13]:
# defining how many past days model will look and will predict future
backcandles=30      # choosing 30 days 

We need to prepare our data so as <br>
model will train on past 30 days and will predict the future as y 

In [14]:
# reparing the data

X=np.array([scaled_close_price[i:i+backcandles] for i in range(len(scaled_close_price)-backcandles)])

In [15]:
# here is my target column
Y=scaled_close_price[backcandles:,]

In [16]:
X.shape,Y.shape

((7053, 30, 1), (7053, 1))

In [17]:
# we will split 90% of our data as training set, 5% will be validation set and rest will be test set
x_train = X[:int(X.shape[0]*.8)]
y_train = Y[:int(X.shape[0]*.8)]
x_val = X[int(X.shape[0]*.8):int(X.shape[0]*.9)]
y_val = Y[int(X.shape[0]*.8):int(X.shape[0]*.9)]
x_test = X[int(X.shape[0]*.9):]
y_test = Y[int(X.shape[0]*.9):]

In [18]:
x_train.shape,y_train.shape,x_test.shape,y_test.shape,x_val.shape,y_val.shape

((5642, 30, 1), (5642, 1), (706, 30, 1), (706, 1), (705, 30, 1), (705, 1))

In [19]:
# Already saved model for you, so you dont have to
# model = keras.models.load_model("./saved_model")

In [20]:
# defifing LSTM model

model = Sequential()
model.add(LSTM(256,input_shape=(backcandles,x_train.shape[-1]),return_sequences=False))
# model.add(LSTM(32,return_sequences=False))
# model.add(LSTM(16,return_sequences=False))
model.add(Dropout(.1))
model.add(Dense(1,'linear'))


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [21]:
model.compile(optimizer=Adam(learning_rate=0.0001),loss='mse',metrics=['RootMeanSquaredError'])

In [22]:
model.summary()

In [None]:
history = model.fit(x=x_train,y=y_train,
                    validation_data=(x_val,y_val),
                    epochs = 300, batch_size = 64,
                    verbose=0,
                    callbacks=[EarlyStopping(monitor='val_loss', patience=30,verbose=0),PlotLearning()])

In [None]:
# predictions and errors
y_predicted=model.predict(x_test)
print("RMSE : ",np.mean(np.square(y_test-y_predicted)))

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
RMSE :  0.000232349495355328


In [None]:
# the dates availabe in test set
test_dates = df['Date'][-x_test.shape[0]-1:-1]

In [None]:

fig=go.Figure()
fig.add_scatter(x=test_dates,y=le.inverse_transform(y_predicted).ravel(),
                marker=dict(color="#00ffff"),
                name="predictions")
fig.add_scatter(x=test_dates,y=le.inverse_transform(y_test).ravel(),
                marker=dict(color="#ff6200"),
                name="Actual")
fig.update_layout(template="plotly_dark",
                  hovermode='x unified',
                  xaxis_title="Date",
                  yaxis_title="Close",
                  title="A quick comparision")


In [None]:
model.save('300+*_full_1.keras')

In [None]:
kamal

In [None]:
# previously we are fedding models actual past value to predict the nextday value
# but what if we give model's predicte value as past value

# Creating a data frame with predited values


updated_x = x_val[-1:]

for i in range(len(x_test)):
    
    updated_x=np.append(updated_x,[model.predict(updated_x[:,-backcandles:],verbose=0)],axis=1)
    


In [None]:

fig=go.Figure()
fig.add_scatter(x=test_dates,y=le.inverse_transform(updated_x[0][backcandles:]).ravel(),
                marker=dict(color="#00ffff"),
                name="predictions")
fig.add_scatter(x=test_dates,y=le.inverse_transform(y_test).ravel(),
                marker=dict(color="#ff6200"),
                name="Actual")
fig.update_layout(template="plotly_dark",
                  hovermode='x unified',
                  xaxis_title="Date",
                  yaxis_title="Close",
                  title="predictions if past values are unknown")


For a longer run it shows price will decrease but in reaity it should be increasing

So our model perfoms good it we want to predict only tomorrow's Closing price, <br>
But if we want to forcast for a longer peiod of time like in this case we did for 238 days , then its bad <br>
upto 10 days it is giving a suitable result


In [None]:
# dumping saler

import joblib

joblib.dump(le, "scaler.pkl")


['scaler.pkl']