In [62]:
# Approach Adopted: LSTM (Recurrent Neural Network)
# Task: Stock Prediction on the TATA dataset.

# Necessary imports 

from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import Dropout
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import math

In [63]:
# Reading the corpus

df = pd.read_csv('NSE-TATAGLOBAL.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
0,2018-09-28,234.05,235.95,230.2,233.5,233.75,3069914,7162.35
1,2018-09-27,234.55,236.8,231.1,233.8,233.25,5082859,11859.95
2,2018-09-26,240.0,240.0,232.5,235.0,234.25,2240909,5248.6
3,2018-09-25,233.3,236.75,232.0,236.25,236.1,2349368,5503.9
4,2018-09-24,233.55,239.2,230.75,234.0,233.3,3423509,7999.55


In [64]:
# We will create a new df using Close column since that is what we want to predict

d = df.filter(['Close'])
dataset = d.values
size = len(dataset)

trainingSize = math.ceil(size * 0.6)

# Normalize data and make values between 0 and 1. 

Scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = Scaler.fit_transform(dataset)

# Make the training data. X --> Independent Y --> Dependent

trainX = []
trainY = []
train = scaled_data[0:trainingSize, :] # 0 - 80 percent of the dataset
for i in range(60, len(train)):        # 60 timesteps
    trainX.append(train[i-60:i,0])
    trainY.append(train[i,0])

# Convert to Numpy Array

train_x = np.array(trainX)
train_y = np.array(trainY)

# Data is in 2 dimensional form. We need to convert it to 3 dimensional form so that it can be used by the KERAs LSTM

train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))   # no. of rows, timestep count, and number of features.

In [65]:
# Make the testing data. X --> Independent Y --> Dependent

testX = []
test = scaled_data[trainingSize - 60 :,:] # 0 - 80 percent of the dataset
testY = dataset[trainingSize:,:]
for i in range(60, len(test)):        # 60 timesteps
    testX.append(test[i-60:i,0])

testX = np.array(testX)
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [66]:
# Create and train the LSTM

regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (train_x.shape[1], 1))) # Input number of steps and features. 50 neurons
regressor.add(LSTM(units = 50, return_sequences = False))                                     # Return sequence = false since no more lstm layer
regressor.add(Dense(25))
regressor.add(Dense(1))                                                                       # Output layer
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fitting the RNN to the Training set
regressor.fit(train_x, train_y, epochs = 1, batch_size = 1)



<keras.callbacks.History at 0x1fba5cbb400>

In [67]:
# Generate predictions

p = regressor.predict(testX)
p = Scaler.inverse_transform(p)

In [68]:
# Find Root Mean Squared Error

np.sqrt(np.mean(p-testY)**2)       # Predicted - actual

6.5503176171305135