### Google Stock Price Prediction

#### 1. Fire the System

In [1]:
# importing libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

#### 2. Read Data

In [2]:
# importing train data

dataset_train = pd.read_csv('GOOG.csv')
dataset_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,49.813286,51.835709,47.800831,49.982655,49.982655,44871300
1,2004-08-20,50.316402,54.336334,50.062355,53.95277,53.95277,22942800
2,2004-08-23,55.168217,56.528118,54.321388,54.495735,54.495735,18342800
3,2004-08-24,55.4123,55.591629,51.591621,52.239193,52.239193,15319700
4,2004-08-25,52.284027,53.798351,51.746044,52.802086,52.802086,9232100


In [3]:
dataset_train.shape

(4006, 7)

In [4]:
dataset_train.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [5]:
dataset_train.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [6]:
# selecting features for training and predictions

cols = list(dataset_train)[1:6]

# extract dates (will be used in visualization)

datelist_train = list(dataset_train['Date'])
dataset_train['Date'] = pd.to_datetime(dataset_train['Date'], dayfirst=True)

print(f'Training set shape: {dataset_train.shape}')
print(f'All Timestamps: {len(datelist_train)}')
print(f'Selected Features: {cols}')

Training set shape: (4006, 7)
All Timestamps: 4006
Selected Features: ['Open', 'High', 'Low', 'Close', 'Adj Close']


  dataset_train['Date'] = pd.to_datetime(dataset_train['Date'], dayfirst=True)


#### 3. Data Pre-processing

In [7]:
# removing all the commas and converting data to matrix shape format

dataset_train = dataset_train[cols].astype(str)

for i in cols:
    for j in range(0, len(dataset_train)):
        dataset_train[i][j] = dataset_train[i][j].replace(',','')

dataset_train = dataset_train.astype(float)

# using multiple features (predictors)
training_set = dataset_train.to_numpy()

print(f'Shape of training set : {training_set.shape}')
training_set

Shape of training set : (4006, 5)


array([[  49.813286,   51.835709,   47.800831,   49.982655,   49.982655],
       [  50.316402,   54.336334,   50.062355,   53.95277 ,   53.95277 ],
       [  55.168217,   56.528118,   54.321388,   54.495735,   54.495735],
       ...,
       [1523.130005, 1535.329956, 1498.      , 1513.640015, 1513.640015],
       [1500.      , 1518.689941, 1486.310059, 1518.      , 1518.      ],
       [1521.619995, 1523.439941, 1498.420044, 1515.550049, 1515.550049]])

In [8]:
# feature scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:,0:1])


array([[-1.27195197],
       [-1.27058974],
       [-1.25745309],
       ...,
       [ 2.71716347],
       [ 2.65453724],
       [ 2.713075  ]])

In [9]:
# creating a data structure with 90 timestamps and 1 output

x_train = []
y_train = []

n_past = 90 #number of past days we need to predict the future
n_future = 60 #number of days we need to predict into the future

for i in range(n_past,len(training_set_scaled) - n_future + 1):
    x_train.append(training_set_scaled[i-n_past:i,0:dataset_train.shape[1]-1])
    y_train.append(training_set_scaled[i+n_future-1:i+n_future,0])

x_train,y_train = np.array(x_train),np.array(y_train)

print(f'x_train shape: {x_train.shape}')
print(f'y_train shape: {y_train.shape}')



x_train shape: (3857, 90, 4)
y_train shape: (3857, 1)


#### Create a model and training

##### Building the LSTM based Neural network

In [10]:
# importing libraries and packages from keras

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam

In [11]:
# model building

model = Sequential()
model.add(LSTM(units = 64,
               return_sequences=True,
               input_shape = (n_past,dataset_train.shape[1]-1)))
model.add(LSTM(units = 10,
          return_sequences = False))
model.add(Dropout(0.25))
model.add(Dense(units=1,
                activation = 'linear'))
model.compile(optimizer = Adam(learning_rate=0.01),
              loss = 'mean_squared_error')

  super().__init__(**kwargs)


In [16]:
#training

es = EarlyStopping(monitor = 'val_loss',min_delta=1e-10,patience=10,verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=10,verbose=1)
mcp = ModelCheckpoint(filepath='weights.weights.h5',monitor='val_loss',verbose=1,
save_best_only=True,save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(x_train,y_train,shuffle=True,epochs = 30,callbacks=[es,rlr,mcp,tb],
validation_split=0.2,verbose =1, batch_size=256)

Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273ms/step - loss: 0.1225
Epoch 1: val_loss improved from inf to 0.87081, saving model to weights.weights.h5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 396ms/step - loss: 0.1191 - val_loss: 0.8708 - learning_rate: 0.0100
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step - loss: 0.0376
Epoch 2: val_loss improved from 0.87081 to 0.40711, saving model to weights.weights.h5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 284ms/step - loss: 0.0375 - val_loss: 0.4071 - learning_rate: 0.0100
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step - loss: 0.0315
Epoch 3: val_loss did not improve from 0.40711
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 275ms/step - loss: 0.0316 - val_loss: 0.5594 - learning_rate: 0.0100
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms