<a href="https://colab.research.google.com/github/Ajay-A3933/ajay/blob/master/stock_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## read data
## do exploratory data analysis [eda]
## split the data to training and testing
## create a model -  rnn, lstm, gru
## model evaluation
##

In [2]:
#time series data, where google stock price prediction for the next three months [time series]
# sequence - RNN
# Run the code shared in chat window

## modify the n_days in code block [7] between 10, 500
## re run the entire code - after 50th epoch u get the loss value, share me the loss value
##

In [3]:
import pandas as pd
data_link = 'https://docs.google.com/spreadsheets/d/1Sqf4OdzWkm_BkfnSw2YAMF_J7vEApKu3uJosyqNgPrE/edit#gid=0'
url_1 = data_link.replace('edit#gid=','export?format=csv&gid=')
df = pd.read_csv(url_1)
df.head()
## incase if the above reading of file does not work than uncomment the lines
# df = pd.read_csv('/content/Google Stock - Sheet1 (3).csv')
# df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,12/17/2019 16:00:00,68.14,68.25,67.57,67.76,1855259
1,12/18/2019 16:00:00,67.83,68.02,67.55,67.63,1656695
2,12/19/2019 16:00:00,67.59,67.91,67.45,67.8,1470112
3,12/20/2019 16:00:00,68.17,68.18,67.45,67.48,3316905
4,12/23/2019 16:00:00,67.79,67.99,67.33,67.44,883200


In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout,GRU,Bidirectional, SimpleRNN
from tensorflow.keras.optimizers import SGD
import math
from sklearn.metrics import mean_squared_error

In [5]:
# converting dates
df.Date = pd.to_datetime(df.Date)
df.Date = [date.strftime(format='%Y/%m/%d') for date in df.Date]
df.Date = pd.to_datetime(df.Date)
df= df.set_index('Date')
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-17,68.14,68.25,67.57,67.76,1855259
2019-12-18,67.83,68.02,67.55,67.63,1656695
2019-12-19,67.59,67.91,67.45,67.8,1470112
2019-12-20,68.17,68.18,67.45,67.48,3316905
2019-12-23,67.79,67.99,67.33,67.44,883200


In [6]:
# How do we split the data in to training and testing in this case?
training_set = df[df.index.year<2023].Close
test_set = df[df.index.year>=2023].Close

In [7]:
# eda using plotly
import plotly.express as px
import plotly.graph_objects as go
temp=df.copy()
temp['Date']=df.index
fig = go.Figure()
for year in range(int(df.index.min().strftime("%Y")),int(df.index.max().strftime("%Y"))+1):
    sample = temp[temp.Date.dt.year==year] # slicing by specific year
    fig.add_traces (go.Candlestick(x=sample.index,
                open=sample.Open,
                high=sample.High,
                low=sample.Low,
                close=sample.Close,
                visible=True,showlegend=False
                                  )
                   )
# Create menu items
items=[]
plo='Candlestick plot for '
all_plot=[True for i in range(int(df.index.min().strftime("%Y")),int(df.index.max().strftime("%Y"))+1)]
items.append(dict(label = 'All',
                    method = 'update',
                    args = [{'visible':all_plot },
                            {'title': plo+'complete data',
                            'showlegend':False}]))
ind=-1
for year in range(int(df.index.min().strftime("%Y")),int(df.index.max().strftime("%Y"))+1):
    visible_plot=[False for i in range(int(df.index.min().strftime("%Y")),int(df.index.max().strftime("%Y"))+1)]
    ind+=1
    visible_plot[ind]=True
    items.append(
            dict(label = str(year),
                    method = 'update',
                    args = [{'visible':visible_plot },
                            {'title': plo+str(year),
                            'showlegend':False}])
                )
fig.update_layout(dragmode=False,showlegend=False,template='plotly_dark',
        updatemenus=[go.layout.Updatemenu(
            active=0,
            buttons=items,
                pad={"r": 10, "t": 2},
                showactive=True,
                x=0,
                xanchor="right",
                y=1.15,
                yanchor="top"
            )
        ]
        )

fig.show()

In [8]:
## scaling the training set
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(np.array(training_set).reshape(-1,1))
## Data Preparation
### a sequence of 1460 training, i cant pass all the data at once, Back Propogation Through time [BPTT],

def picker(series,pos,days):
  return(list(series[pos:pos+days]),series[pos+days],np.mean(np.array(series[pos:pos+days])))

def prepare_data(series,days):
  pos =0
  X,y =[],[]
  while pos!=len(series)-days:
    X_entry,y_entry,mean = picker(series,pos,days)
    X_entry = np.array(X_entry)
    X.append(X_entry)
    y.append(y_entry)
    pos+=1
  return(np.array(X),np.array(y))


In [9]:
# list_a = [1,2,3,4,5,6,7,8,9,10]
# pos_test = 0
# while pos_test!=len(list_a)-3:
#   days_test = 3
#   print(pos_test)
#   print("x_data",list_a[pos_test:pos_test+days_test])
#   print("y_data", list_a[pos_test+days_test])
#   pos_test+=1


##

In [10]:
n_days = 160 # change this value
X_train,y_train = prepare_data(training_set_scaled,n_days)
# Reshaping the data
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))
# test data


In [11]:
df_total = df.copy()
inputs = df_total[len(df_total)-len(test_set)-30:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)

In [12]:
regressorRNN = Sequential()
regressorRNN.add(SimpleRNN(32,
                           return_sequences=True,
                           input_shape=(X_train.shape[1],1),
                           activation='tanh'))
regressorRNN.add(Dropout(0.05))
regressorRNN.add(SimpleRNN(32,
                           return_sequences=True,
                           input_shape=(X_train.shape[1],1),
                           activation='tanh'))
regressorRNN.add(Dropout(0.05))
regressorRNN.add(SimpleRNN(32,
                           return_sequences=True,
                           input_shape=(X_train.shape[1],1),
                           activation='tanh'))
regressorRNN.add(Dropout(0.05))
regressorRNN.add(Dense(units=1))
regressorRNN.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [15]:
# compile the model
import tensorflow as tf
regressorRNN.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),loss='mean_squared_error')
regressorRNN.fit(X_train,y_train,epochs=50,batch_size=16)

Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 97ms/step - loss: 0.3502
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.1706
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1516
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1403
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1307
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1245
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1233
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1204
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1179
Epoch 10/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.1148

<keras.src.callbacks.history.History at 0x7d9228135650>

In [19]:
# Combine the last n_days of the training set and the test set using pd.concat
combined_series = pd.concat([training_set[-n_days:], test_set])

# Prepare the test data using the combined series
X_test,y_test = prepare_data(sc.transform(np.array(combined_series).reshape(-1,1)),n_days)

In [20]:
simple_rnn_predicted_price = regressorRNN.predict(X_test.reshape(X_test.shape[0],X_test.shape[1],1))

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step


In [41]:
simple_rnn_predicted_price


array([[[0.13593741],
        [0.1397513 ],
        [0.8422058 ],
        ...,
        [0.5167296 ],
        [0.5543143 ],
        [0.5181892 ]],

       [[0.1355026 ],
        [0.14097601],
        [0.82469755],
        ...,
        [0.5541857 ],
        [0.5181227 ],
        [0.5519946 ]],

       [[0.13598263],
        [0.13924287],
        [0.8503056 ],
        ...,
        [0.51799875],
        [0.5519425 ],
        [0.55665845]],

       ...,

       [[0.14843623],
        [0.13219987],
        [0.9411495 ],
        ...,
        [0.7892836 ],
        [0.78584844],
        [0.80978465]],

       [[0.14893645],
        [0.13191606],
        [0.94713765],
        ...,
        [0.78584856],
        [0.8097847 ],
        [0.79182285]],

       [[0.14908728],
        [0.13178378],
        [0.9504425 ],
        ...,
        [0.80978477],
        [0.79182273],
        [0.78068566]]], dtype=float32)

In [23]:

## save model
regressorRNN.save('/content/rnn_model.keras') # Added a filename with .keras extension

In [25]:
## load model
import keras
model = keras.models.load_model('/content/rnn_model.keras')

In [42]:
len(simple_rnn_predicted_price[0])

160

In [27]:
# Long Short Term Memory
lstm = Sequential()
lstm.add(LSTM(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
lstm.add(Dropout(0.05))
lstm.add(LSTM(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
lstm.add(Dropout(0.05))
lstm.add(LSTM(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
lstm.add(Dropout(0.05))
lstm.add(Dense(units=1))
lstm.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [31]:
# compile the model
import tensorflow as tf
lstm.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
                     loss='mean_squared_error')
lstm.fit(X_train,y_train,epochs=50,batch_size=16)

Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.4459
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.2449
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.1506
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0974
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0774
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0624
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0583
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0554
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0547
Epoch 10/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0547

<keras.src.callbacks.history.History at 0x7d91ab189e50>

In [30]:
# Mean squared error (MSE)

## understand sequence learning concepts: rnn, lstm

In [32]:
# Long Short Term Memory
gru = Sequential()
gru.add(SimpleRNN(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru.add(Dropout(0.05))
gru.add(LSTM(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru.add(Dropout(0.05))
gru.add(GRU(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru.add(Dropout(0.05))
gru.add(Dense(units=1))
gru.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [34]:
# compile the model
import tensorflow as tf
gru.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
                     loss='mean_squared_error')
gru.fit(X_train,y_train,epochs=50,batch_size=16)

Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 163ms/step - loss: 0.0980
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 166ms/step - loss: 0.0559
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 161ms/step - loss: 0.0486
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 135ms/step - loss: 0.0486
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 133ms/step - loss: 0.0472
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 159ms/step - loss: 0.0449
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 132ms/step - loss: 0.0443
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 159ms/step - loss: 0.0450
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 132ms/step - loss: 0.0441
Epoch 10/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 154ms/step -

<keras.src.callbacks.history.History at 0x7d91ab245490>

In [35]:
# Long Short Term Memory
gru_only = Sequential()
gru_only.add(GRU(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru_only.add(Dropout(0.05))
gru_only.add(GRU(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru_only.add(Dropout(0.05))
gru_only.add(GRU(32,
                return_sequences=True,
                input_shape=(X_train.shape[1],1),
                activation='tanh'))
gru_only.add(Dropout(0.05))
gru_only.add(Dense(units=1))
gru_only.summary()

In [37]:
# compile the model
import tensorflow as tf
gru_only.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
                     loss='mean_squared_error')
gru_only.fit(X_train,y_train,epochs=50,batch_size=16)

Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.3148
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.1353
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0731
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0484
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0430
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0382
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0417
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0382
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0381
Epoch 10/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0377

<keras.src.callbacks.history.History at 0x7d91b1f5bf90>

In [38]:

### experiment with n_days [change the value of n_days - [10,500]]
## run the code and get the results in that chat window

In [40]:
# Combine the last n_days of the training set and the test set using pd.concat
combined_series = pd.concat([training_set[-n_days:], test_set])

# Prepare the test data using the combined series
X_test, y_test = prepare_data(sc.transform(np.array(combined_series).reshape(-1, 1)), n_days)