# Import the Libraries

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.layers.core import Dense, Activation, Dropout

# specify to ignore warning messages
import warnings
warnings.filterwarnings("ignore") 



Using TensorFlow backend.


# Preprocessing the stock prices

In [2]:
stock_names = ['Apple','Netflix','Google']
# Define empty dataframe to store the stock closing prices
df = pd.DataFrame()
for stock in stock_names:
    # Read the stock closing prices with date as the index column.
    df[stock] = pd.read_csv(stock+'.csv',parse_dates= [0], index_col = 'date',skiprows = [1]).close
# Rearange the data in ascending chronological order
df = df.sort_index()
# Forward fill the missing holiday and weekend stock closing prices
df = df.resample('D').ffill()
# Log-transform the closing price data frame
df_log = np.log(df)
# Define a random seed for the test results
np.random.seed(0)
# Display first five five rows of the data frame
df_log.head()

Unnamed: 0_level_0,Apple,Netflix,Google
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-02-23,2.519421,1.625882,5.116364
2009-02-24,2.556677,1.639269,5.161938
2009-02-25,2.566709,1.631767,5.150848
2009-02-26,2.544857,1.631473,5.137707
2009-02-27,2.546206,1.644245,5.140106


# Mean Absolute Percentage Error

In [3]:
def MAPE(y_true,y_pred):
    """  input:
    y_true = True value of variables
    y_pred = predicted values of variable
    output:
    MAPE = mean absolute percentage error
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred) # make sure input is flat   
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Long Short Term Memory Model

In [4]:
def LSTM_model(indata,test_size, WINDOW = 20, scale = 1):
    """ input
    indata: time series
    test_size = prediction period length
    WINDOW = number of past lags to consider, default value of last 20 days
    scale = 1/0 if data to be scaled/or not
    """
    # preprocess the data
    data = indata.values
    data = data.reshape(-1,1)
    test = data[-test_size:]
    ###########################################
    #If scale = 1 normalize the data in 0,1 range
    if scale:
        scaler = MinMaxScaler(feature_range=(0, 1))
        data = scaler.fit_transform(data)
    ###########################################
    ## Convert the data into features and varaible form
    ###########################################
    # Train features and variables
    trainX, trainY = [], []
    for i in range(WINDOW,len(indata)-test_size):
        trainX.append(data[i-WINDOW:i,0])
        trainY.append(data[i,0])
    trainX, trainY = np.array(trainX), np.array(trainY)
    trainX = np.reshape(trainX, (trainX.shape[0],trainX.shape[1],1))
    ###########################################
    # Test features and varaibles
    test_data = indata[len(indata) - test_size - WINDOW:].values
    test_data = test_data.reshape(-1,1)
    if scale:
        test_data  = scaler.transform(test_data)
    testX, testY = [],[]
    for i in range(WINDOW,test.shape[0]):
        testX.append(test_data[i-WINDOW:i,0])
        testY.append(test_data[i,0])
    testX, testY = np.array(testX), np.array(testY)
    testX = np.reshape(testX, (testX.shape[0],testX.shape[1],1))
    ###########################################
    # LSTM model training
    # Sequential model is trained
    model = Sequential()
    # Add first layer with 50 neurons
    model.add(LSTM(units=50, return_sequences=True, input_shape=(trainX.shape[1],1)))
    # Add second layer with 50 neurons
    model.add(LSTM(units=50))
    # Final single variable output
    model.add(Dense(1))
    ###########################################
    # Compile the model with MSE and Adam optimizer
    model.compile(loss='mean_squared_error', optimizer='adam')
    # Fit the model using training features and variables
    model.fit(trainX, trainY, epochs=1, batch_size=1, verbose=2)
    ###########################################
    # Compute the yhat (predicted varaible values)
    train_predict = model.predict(trainX)
    test_predict = model.predict(testX)
    ###########################################
    # Return the data in not scaled form
    if scale:
        train_predict = scaler.inverse_transform(train_predict)
        test_predict = scaler.inverse_transform(test_predict)

    return train_predict,test_predict

# LSTM Fit and Predictions

In [5]:
WINDOW = 20
len_test = len(df['2019-02-01':])
df_lstm = pd.DataFrame()
for i in range(3):
    train,test = LSTM_model(df_log[stock_names[i]],test_size = len_test, WINDOW = WINDOW,scale = 1)
    df_lstm[stock_names[i]] = df_log.iloc[WINDOW:,i]
    df_lstm[stock_names[i]+'_Predict'] = np.concatenate((train,test))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/1
 - 87s - loss: 8.8086e-04


ValueError: Length of values does not match length of index

# Stock plots after 2018

In [None]:
fig, axs = plt.subplots(3,1, figsize=(15, 10), facecolor='w', edgecolor='k',sharex = True)
for i in range(3):
    df_lstm.loc['2018-01-01':,stock_names[i]].plot(ax = axs[i], color = 'blue')
    df_lstm.loc['2018-01-01':'2019-02-01',stock_names[i]+'_Predict'].plot(ax = axs[i], color = 'green')
    df_lstm.loc['2019-02-01':,stock_names[i]+'_Predict'].plot(ax = axs[i], color='red',marker = '*',markersize = '2')
    
    axs[2].set_xlabel('Date',size = '15')
    axs[i].set_ylabel(r'$log(P_{close} ,\$ $)', size = '15')
    axs[i].legend(['Original','Fitted','Prediction'],loc = 'upper left')
    axs[i].set_title(stock_names[i], size = '15')
    axs[i].set_xlim(['2018-01-01','2019-02-23'])    
    axs[0].set_ylim([4.9,5.5])
    axs[1].set_ylim([5.3,6.1])
    axs[2].set_ylim([6.8,7.3])   
    axs[i].grid()
plt.subplots_adjust(wspace = 0.04, hspace = 0.2)
plt.show()

In [None]:
df_lstm.tail()

# Stock plots after 2019

In [None]:
fig, axs = plt.subplots(3,1, figsize=(15, 10), facecolor='w', edgecolor='k',sharex = True)
for i in range(3):
    df_lstm.loc['2019-01-01':,stock_names[i]].plot(ax = axs[i], color = 'blue', marker = 'o')
    df_lstm.loc['2019-01-01':'2019-02-01',stock_names[i]+'_Predict'].plot(ax = axs[i], color = 'green', marker = '*')
    df_lstm.loc['2019-02-01':,stock_names[i]+'_Predict'].plot(ax = axs[i], color='red', marker = '*')
    mape = MAPE(df_lstm.loc['2019-02-01':,stock_names[i]],df_lstm.loc['2019-02-01':,stock_names[i]+'_Predict'])
    axs[i].set_title('{} : Mean Absolute Precentage Error = {}%'.format(stock_names[i],np.around(mape,2)))
    axs[i].grid()
    axs[2].set_xlabel('Date',size = '15')
    axs[i].set_ylabel(r'$log(P_{close} ,\$ $)', size = '15')
    axs[i].legend(['Original','Fitted','Prediction'],loc = 'upper left')
    axs[0].set_ylim([4.9,5.3])
    axs[1].set_ylim([5.5,6.0])
    axs[2].set_ylim([6.9,7.1])
    axs[i].set_xlim(['2019-01-01','2019-02-23'])
plt.show()