In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
from keras.optimizers import SGD
import math
from sklearn.metrics import mean_squared_error

# Import entire .csv file as a DataFrame
dataset = pd.read_csv('C:/data/ML_Models/RNN/IBM.csv',index_col='Date',parse_dates=['Date'])
dataset.head()

ModuleNotFoundError: No module named 'matplotlib'

#### Column selection and dataset slicing
For this case "Close" stock price column is selected, which indicates IBM's closing price.

In [None]:
# Selecting column 'Close'
training_set = dataset[:'2016'].iloc[:,3:4].values
test_set = dataset['2017':].iloc[:,3:4].values

#### Apply Feature Scaling to the Data set
Used normalization technique for feature scaling using MinMaxscaler

In [None]:
# Scaling the training set
sc = MinMaxScaler()
training_set_scaled = sc.fit_transform(training_set)

##### 'Close' attribute for prices visualization

In [None]:
# 'Close' attribute for prices visualization.
dataset['Close'][:'2016'].plot(figsize=(16,4),legend=True)
dataset['Close']['2017':].plot(figsize=(16,4),legend=True)
plt.legend(['Training set','Test set'] )
plt.title('IBM stock price')
plt.show()

#### Specify the Number of Timesteps
Timesteps specify how many previous observations should be considered when the recurrent neural network makes a prediction about the current observation. 
In this case, 60 timesteps is used, menaing for every day the neural network predicts, it will consider the previous 60 days of stock prices to determine its output.

In [None]:
# So for each element of training set, we have 60 previous training set elements 
X_train_data = []
y_train_data = []
for i in range(60,2769):
    # for i in range(60,len(training_set)):
    X_train_data.append(training_set_scaled[i-60:i,0])
    y_train_data.append(training_set_scaled[i,0])
    
# Transform to Numpy array
X_train_data, y_train_data = np.array(X_train_data), np.array(y_train_data)

# Reshaping X_train_data for efficient modelling
# Reason for reshaping is that the recurrent neural network layer only accepts data in a specific format
# Use np.reshape method
X_train_data = np.reshape(X_train_data, 
                          (X_train_data.shape[0],
                           X_train_data.shape[1],
                           1))

This outputs:

In [None]:
print(X_train_data.shape)

### Building Recurrent Neural Network
### 1. Long Short Term Memory (LSTM)
Used Sequential class i.e. add sequences of layers over time to the build recurrent neural network.
##### Dropout Regularization
Dropout regularization is a technique used to avoid overfitting when training neural networks.

Epochs: the number of iterations for the recurrent neural network to be trained on. 

The batch size: the size of batches that the network will be trained in through each epoch.

Used Adam optimizer which is a workhorse optimizer that is useful in a wide variety of neural network architectures.

In [None]:
# Initialize recurrent neural network
rnn = Sequential()
# Add more layers to this recurrent neural network using the add method
# First LSTM layer with Dropout regularisation
rnn.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_data.shape[1],1)))
# Adding Some Dropout Regularization
# Dropout regularization is a technique used to avoid overfitting when training neural networks.
rnn.add(Dropout(0.2))

# Add Three more LSTM layer
# 2nd LSTM layer
rnn.add(LSTM(units=50, return_sequences=True))
rnn.add(Dropout(0.2))
# Third LSTM layer
rnn.add(LSTM(units=50, return_sequences=True))
rnn.add(Dropout(0.2))
# Fourth LSTM layer
rnn.add(LSTM(units=50))
rnn.add(Dropout(0.2))
# The output layer
rnn.add(Dense(units=1))
# Adding The Output Layer To Our Recurrent Neural Network
rnn.add(Dense(units=1))

# Compiling the RNN
# the compilation step of building a neural network is where we specify 
# the neural net’s optimizer and loss function
#rnn.compile(optimizer='rmsprop',loss='mean_squared_error') 
rnn.compile(optimizer='adam',loss='mean_squared_error', metrics=['accuracy']) 
# The Adam optimizer is a workhorse optimizer that is useful in a wide variety of neural network architectures.
# Fitting the recurrent neural network on the training set
rnn.fit(X_train_data,y_train_data,epochs=50,batch_size=32)

#### Building The Test Data Set
Create an array to hold stock prices from Jan 2017 and the 60 trading days prior to January 2017

In [None]:
# Preparing the dataset
# Used the first 60 entires of test set
dataset_total = pd.concat((dataset["Close"][:'2016'],dataset["Close"]['2017':]),axis=0)
inputs = dataset_total[len(dataset_total)-len(test_set) - 60:].values
inputs = inputs.reshape(-1,1) # To make it suitable for predict method
inputs  = sc.transform(inputs)


#### Making Predictions
Used the test data set to make predictions by calling the predict method on the rnn object.

In [None]:
# Grouping the Test Data
X_test = []
for i in range(60,311):
    X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
# making predictions
predicted_stock_price = rnn.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

#print(predicted_stock_price.shape)

In [None]:
X_test

#### Plot the Predictions
The plot compares the predicted stock prices with IBM's actual stock price.

In [None]:
plt.plot(test_set, color='red',label='Actual Stock Price')
plt.plot(predicted_stock_price, color='green',label='Predicted Stock Price')
plt.title('IBM Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('IBM Stock Price')
plt.legend()
plt.show()


#### Evaluating the Model
Root Mean Square Error (RMSE) is a standard way to measure the error of a model in predicting quantitative data.

In [None]:
rmse = math.sqrt(mean_squared_error(test_set, predicted_stock_price))
print("The root mean squared error is {}.".format(rmse))

The lower the RMSE, the better a given model is able to "fit" a dataset

With a rmse of 2.4, the model is doing well.

### 2. Gated Recurrent Units (GRU)
GRU is similar to LSTM, but it has fewer gates. Also, it relies solely on a hidden state for memory transfer between recurrent units, so there is no separate cell state.
#### Adding Layers to GRU RNN 
The GRU RNN is a Sequential Keras model. After initializing the Sequential model, there's need to add in the layers. The first layer to add is the Gated Recurrent Unit layer.

In [None]:
# The GRU architecture
regressorGRU = Sequential()
# First GRU layer with Dropout regularisation
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train_data.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Second GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train_data.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Third GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train_data.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Fourth GRU layer
regressorGRU.add(GRU(units=50, activation='tanh'))
regressorGRU.add(Dropout(0.2))
# The output layer
regressorGRU.add(Dense(units=1))
# Compiling the RNN
regressorGRU.compile(optimizer=SGD(learning_rate=0.01, decay=1e-7, momentum=0.9, nesterov=False),loss='mean_squared_error',metrics=['accuracy'])
# Fitting to the training set
regressorGRU.fit(X_train_data,y_train_data,epochs=50,batch_size=150)

Used the same dataset to train the GRU model. To train the model in Keras, we just call the fit function.

#### Test the GRU Model
Used the same test data set that was used to predict in the LSTM model.

In [None]:
# Preparing X_test and predicting the prices
X_test = []
for i in range(60,311):
    X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
GRU_predicted_stock_price = regressorGRU.predict(X_test)
GRU_predicted_stock_price = sc.inverse_transform(GRU_predicted_stock_price)

#### Predictions Visualization
In this case we visualize the predicted values and the actual stock pricce.

In [None]:
plt.plot(test_set, color='red',label='Actual Stock Price')
plt.plot(GRU_predicted_stock_price, color='green',label='Predicted Stock Price')
plt.title('IBM Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('IBM Stock Price')
plt.legend()
plt.show()

#### Evaluating the Model
Root Mean Square Error (RMSE) is a standard way to measure the error of a model in predicting quantitative data.

The lower the RMSE, the better a given model is able to "fit" a dataset.

In [None]:
rmse = math.sqrt(mean_squared_error(test_set, GRU_predicted_stock_price))
print("The root mean squared error is {}.".format(rmse))

#### Interpretation
From the visualization above, after 50 epochs, the model does quite well for both the training and validation data. It predicts the pattern correctly.

In [None]:
#import pickle
# open a file where to store the data
#file=open('lstm_gru_model.pkl','wb')

# dump information to that file
#pickle.dump(rnn, file)


In [None]:
# Loading model to compare results