## Packages

In [1]:
"""
Importing the required standard library modules that are required for the project.  

Importing visualize.py, a user-defined module developed specifically for the project.
"""

import math
import numpy as np
import pandas as pd
import keras
import tensorflow as tf

from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout,Activation

import visualize as vs 

## Importing Data

In [2]:
"""
Importing the locally stored dataset.  

: variable StockPrice: DataFrame with columns as ["Date","Symbol","Open","Close","Volume","VWAP"]
"""


StockPrice = pd.read_excel("STOCKPRICE.xlsx")

In [3]:
"""
List of all Stock Symbols present in dataset.  
Printing the number of stock companies present.

: variable stockSymbols: A list of all stock companies present in the dataset.
"""


stockSymbols = ["ADANIPORTS", "ASIANPAINT", "AXISBANK", "BAJAJFINSV", "BAJFINANCE", 
                "BPCL", "BRITANNIA", "CIPLA", "COALINDIA", "DRREDDY", "EICHERMOT", "GAIL", "GRASIM", 
                "HCLTECH", "HDFC", "HDFCBANK", "HEROHONDA", "HEROMOTOCO", "HINDALCO", "HINDUNILVR", 
                "ICICIBANK", "INDUSINDBK", "INFRATEL", "INFY", "IOC", "ITC", "JSWSTEEL", "KOTAKBANK", 
                "LT", "M&M", "MARUTI", "NESTLEIND", "NTPC", "ONGC", "POWERGRID", "RELIANCE",
                "SBIN", "SESAGOA", "SHREECEM", "SUNPHARMA", "TATAMOTORS", "TATASTEEL", "TCS",
                "TECHM", "TITAN", "ULTRACEMCO", "UPL", "VEDL","WIPRO", "ZEEL",
               ]


len(stockSymbols)

50

## Selecting Stock

In [4]:
"""
Accepting user input to predict selected stock. Finding index of selected stock in stockSymbols.

: variable stockName: Stores the stock name input by the user.
: variable i: Stores the index of stockName.
"""


stockName = input("Enter the Stock Name: ")
stockName = stockName.upper()


i = stockSymbols.index(stockName)
print(i)

Enter the Stock Name: adaniports
0


In [21]:
"""
Creating a pandas DataFrame of only selected stock.

: variable df: DataFrame of stockName with columns as ["Date","Symbol","Open","Close","Volume","VWAP"]
"""


df = StockPrice[StockPrice.Symbol == stockSymbols[i]]
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2197 entries, 0 to 2196
Data columns (total 6 columns):
Date      2197 non-null datetime64[ns]
Symbol    2197 non-null object
Open      2197 non-null float64
Close     2197 non-null float64
Volume    2197 non-null int64
VWAP      2197 non-null float64
dtypes: datetime64[ns](1), float64(3), int64(1), object(1)
memory usage: 120.1+ KB
None


In [22]:
"""
Dropping all columns except Close and changing the index of the dataframe to store dates.

: variable df: DataFrame of stockName with columns as ["Date", "Close"] and index as dates.
"""


df["Date"] = pd.to_datetime(df["Date"])
df.set_axis(df["Date"], inplace=True)
df.drop(columns=["Open", "VWAP", "Symbol", "Volume"], inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [23]:
""" Displaying first 5 values of the selected stock. """


display(df.head())

Unnamed: 0_level_0,Date,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-01-17,2012-01-17,140.0
2012-01-18,2012-01-18,141.7
2012-01-19,2012-01-19,149.4
2012-01-20,2012-01-20,155.4
2012-01-23,2012-01-23,146.75


## Basic Visualisation

In [24]:
""" Plotting the Close price of selected stock using plot_basic(data,title) present in visualize.py """


vs.plot_basic(df,stockSymbols[i])

## Splitting into Train and Test

In [25]:
"""
Splitting the selected stock dataset into training data and test data in an 80%-20% proportion.
Displaying the size of training dataset and testing dataset.

: varible close_data: A numpy array that contains only Close prices of stockName
: variable split_percent: Stores proportion of splitting the dataset into training dataset and testing dataset
: variable close_train: A numpy array that contains 80% of close_data that will be used to train the model.
: variable close_test: A numpy array that contains 20% of close_data that will be used to test the performance of the model.
: variable date_train: A pandas Series that contains 80% of Dates required for visualising training dataset
: variable date_test: A pandas Series that contains 20% of Dates required for visualising testing dataset
"""


close_data = df["Close"].values
close_data = close_data.reshape((-1,1))

split_percent = 0.80
split = int(split_percent * len(close_data))

close_train = close_data[: split]
close_test = close_data[split:]

date_train = df["Date"][: split]
date_test = df["Date"][split:]

print(len(close_train))
print(len(close_test))

1757
440


In [26]:
"""
Creating a TimeSeriesGenerator for which is fed to train and test the model.

: variable look_back: Stores the number of previous days' data to use, to predict the value for the next day. 
: variable train_generator: A time series sample with input and output components as training dataset.
: variable test_generator: A time series sample with input and output components as testing dataset.
"""


look_back = 30

train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size = 20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size = 1)


##  Building the LSTM Model

In [27]:
"""
Building the model architecture with 2 layers 
(1 LSTM layer with 10 neurons & linear activation function, 1 Dense Layer with 1 neuron).
Compile defines the loss function, the optimizer and the metrics. A compiled model is needed to train because training 
uses the loss function and the optimizer.

: variable num_epochs: Stores the number of epochs the model performs.
: variable fittModel: Contains the trained model.
"""


model = Sequential()

model.add(
    LSTM(10,
        activation="linear", 
        input_shape = (look_back, 1),
        )
)

model.add(Dense(1))


model.compile(optimizer="adam", loss="mse")


num_epochs = 10


fittModel = model.fit_generator(train_generator, epochs = num_epochs, verbose = 2)

Epoch 1/10
87/87 - 1s - loss: 99846.3594
Epoch 2/10
87/87 - 1s - loss: 77987.1484
Epoch 3/10
87/87 - 0s - loss: 10181.6201
Epoch 4/10
87/87 - 0s - loss: 148.1138
Epoch 5/10
87/87 - 1s - loss: 110.3436
Epoch 6/10
87/87 - 0s - loss: 83.4740
Epoch 7/10
87/87 - 0s - loss: 70.4404
Epoch 8/10
87/87 - 0s - loss: 65.5012
Epoch 9/10
87/87 - 1s - loss: 70.0610
Epoch 10/10
87/87 - 1s - loss: 52.8715


In [28]:
""" Gives the architecture and summary of the model. """


model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10)                480       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 11        
Total params: 491
Trainable params: 491
Non-trainable params: 0
_________________________________________________________________


### EPOCHS VS LOSS

In [29]:
"""
Visualising Epochs vs Loss by using a user-defined function in visualize.py

: variable loss_train: A list that stores the loss of each epoch when training the model.
: variable epochs: A list that stores the number of epochs.
"""


loss_train = fittModel.history["loss"]
epochs = list(range(1, num_epochs+1))

vs.plot_epochs_loss(epochs, loss_train)

## Model Evaluation:

In [30]:
"""
Evaluating the mean squared error and root mean squared error of the model during training and testing.

: variable trainScore: Stores the value of mean squared error of the model when trained.
: variable testScore: Stores the value of mean squared error of the model when tested.
"""


trainScore = model.evaluate(train_generator, verbose = 2)
print("Train Score: %.8f MSE (%.8f RMSE)" % (trainScore, math.sqrt(trainScore)))

print("\n")

testScore = model.evaluate(test_generator, verbose = 2)
print("Test Score: %.8f MSE (%.8f RMSE)" % (testScore, math.sqrt(testScore)))

87/87 - 0s - loss: 56.4943
Train Score: 56.49431229 MSE (7.51626984 RMSE)


410/410 - 1s - loss: 117.0277
Test Score: 117.02768707 MSE (10.81793359 RMSE)


## Prediction Using Test Data

In [31]:
"""
Evaluating the model using the testing data and obtaining the predictions for the test dataset. 
Reshaping close_train, close_test and prediction for visualising.

: variable prediction: A numpy array that stores the prediction value of the test dataset.
"""


prediction = model.predict_generator(test_generator)

close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))


In [32]:
""" Visualising the LSTM predictions using user defined function in visualize.py """


vs.plot_lstm_prediction(close_train, date_train, close_test, date_test, prediction, stockSymbols[i])

## Forecasting

In [33]:
""" Reshaping close_data for forecasting and defining functions predict and predict_data. """


close_data = close_data.reshape((-1))
look_back = 30


def predict(num_prediction, model):
    
    """
    To forecast the future close prices of the stock.
    
    :param num_prediction: Stores the number of days the model will forecast in the future
    :param model: Stores the LSTM model
    :return : A list of forecasted close prices for the selected stock
    """
    
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
    
def predict_dates(num_prediction):
    
    """
    To create a list of the future dates based on the end of the stock.
    
    :param num_prediction : Stores the number of days the model will forecast in the future
    :return : A list of future date values
    """
    
    last_date = df["Date"].values[-1]
    prediction_dates = pd.date_range(last_date, periods = num_prediction + 1).tolist()
    return prediction_dates



In [34]:
"""
:variable num_prediction: stores the number of days the model will forecast in the future
:variable forecast: A list of forecasted close prices for the selected stock
:variable forecast_dates : A list of future date values required for visualising
"""


num_prediction = 45 
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)

In [35]:
""" Displaying the last 10 values of the forecast list. """


forecast[-10:-1]

array([302.32824707, 300.38574219, 298.54373169, 296.8298645 ,
       295.22976685, 293.71176147, 292.24954224, 290.83242798,
       289.45535278])

In [36]:
""" Visualising the forecasted values by the model using user-defined function in visualize.py """


vs.plot_lstm_forecasting(close_train, date_train, close_test, date_test, prediction, forecast_dates, forecast, stockSymbols[i])