In [153]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
    %tensorflow_version 2.x
except Exception:
    pass

import tensorflow as tf
import numpy as np
import pandas as pd
import pandas_datareader.data as web
from pandas import Series, DataFrame

import plotly
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import sklearn
from sklearn.model_selection import TimeSeriesSplit
from sklearn import preprocessing

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import os
import json
import datetime

from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense

from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.utils import check_array
from sklearn.metrics import mean_squared_error as mse
from math import sqrt

In [154]:
start = datetime.datetime(2010,1,1)
end = datetime.datetime(2019,1,1)
df = web.DataReader('GOOG','yahoo',start,end)
df2 = web.DataReader('AAPL','yahoo', start,end)

df.to_csv("google_stock_prices.csv")
df2.to_csv("apple_stock_prices.csv")

In [155]:
df=pd.read_csv("google_stock_prices.csv")

data_google = df[['Date', 'Open', 'Low', 'High', 'Close']]
data_google['Date'] = pd.to_datetime(data_google['Date'], utc=False)
data_google= data_google.sort_values('Date')
data_google.to_csv("google_stock_prices.csv")
data_google.head()

Unnamed: 0,Date,Open,Low,High,Close
0,2010-01-04,312.304413,310.954468,313.57962,312.204773
1,2010-01-05,312.418976,309.609497,312.747742,310.829926
2,2010-01-06,311.761444,302.047852,311.761444,302.994293
3,2010-01-07,303.562164,295.218445,303.861053,295.940735
4,2010-01-08,294.894653,293.455048,300.498657,299.885956


In [156]:
df2=pd.read_csv("apple_stock_prices.csv")

data_apple = df2[['Date', 'Open', 'Low', 'High', 'Close']]
data_apple['Date'] = pd.to_datetime(data_apple['Date'], utc=False)
data_apple = data_apple.sort_values('Date')
data_apple.to_csv("apple_stock_prices.csv")
data_apple.head()

Unnamed: 0,Date,Open,Low,High,Close
0,2010-01-04,30.49,30.34,30.642857,30.572857
1,2010-01-05,30.657143,30.464285,30.798571,30.625713
2,2010-01-06,30.625713,30.107143,30.747143,30.138571
3,2010-01-07,30.25,29.864286,30.285715,30.082857
4,2010-01-08,30.042856,29.865715,30.285715,30.282858


In [157]:
#extracting the columns we want 

# feature_google = data_google.columns[4]
# target_google = data_google.columns[0]

#Preprosessing Google Data
stock_data_g= data_google[['Date'] + ['Close']]
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaler_data = scaler.fit_transform(stock_data_g['Close'].values.reshape(-1, 1)).tolist()
scaler_data = pd.DataFrame(scaler_data, columns=['Close'])
stock_data_g = pd.DataFrame(pd.concat([scaler_data, stock_data_g['Date']], axis=1), columns=stock_data_g.columns)
stock_data_g.head(10)


Unnamed: 0,Date,Close
0,2010-01-04,0.090365
1,2010-01-05,0.089057
2,2010-01-06,0.081603
3,2010-01-07,0.074892
4,2010-01-08,0.078646
5,2010-01-11,0.078214
6,2010-01-12,0.073177
7,2010-01-13,0.07157
8,2010-01-14,0.072878
9,2010-01-15,0.06821


In [158]:
# feature_apple = data_apple.columns[4]
# target_apple = data_apple.columns[0]

#Preprosessing Apple Data
stock_data_a = data_apple[['Date'] + ['Close']]
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaler_data = scaler.fit_transform(stock_data_a['Close'].values.reshape(-1, 1)).tolist()
scaler_data = pd.DataFrame(scaler_data, columns=['Close'])
stock_data_a = pd.DataFrame(pd.concat([scaler_data, stock_data_a['Date']], axis=1), columns=stock_data_a.columns)
stock_data_a.head(10)

Unnamed: 0,Date,Close
0,2010-01-04,0.01533
1,2010-01-05,0.015589
2,2010-01-06,0.013208
3,2010-01-07,0.012936
4,2010-01-08,0.013913
5,2010-01-11,0.012608
6,2010-01-12,0.010939
7,2010-01-13,0.012985
8,2010-01-14,0.012133
9,2010-01-15,0.00969


In [159]:
# Let's plot the data
trace0 = go.Scatter(
    x=stock_data_g['Date'],
    y=stock_data_g['Close'],
    name='Google',
    line=dict(
        color='rgb(205, 12, 24)',
        width=2)
)

trace1 = go.Scatter(
    x=stock_data_a['Date'],
    y=stock_data_a['Close'],
    name='Apple',
    mode='lines',
    line=dict(
        color='rgb(22, 96, 167)',
        width=2)
)

fig =make_subplots(rows=2, cols=1)

fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 2, 1)

fig['layout'].update(height=600, width=800, title='Google vs Apple Stock')
fig.show()

In [160]:
def split_sequence(data, ts_col, x_col, n_steps=3, n_outs=1):
    #We want to create a 
    sequence = data[x_col].values
    ts = data.loc[n_steps:len(sequence)-n_outs, ts_col].values

    X, Y = [], []

    for i in range(len(sequence)):
        end_ix = i + n_steps

        # Extract the features and label
        if end_ix > (len(sequence) - n_outs):
            break
        seq_x = sequence[i: end_ix]
        
        #If we want more than one feature to be considered a target(2 for example) 
        # here we want the last feature of the sequence to be taken as target
        if n_outs > 1:
            seq_y = np.array(sequence[end_ix: end_ix + n_outs])
        else:
            seq_y = np.array(sequence[end_ix])
        
        X.append(seq_x)
        Y.append(seq_y)
        
    
    X = np.array(X)
    Y = np.array(Y)
    
    if n_outs < 2:
        Y = np.expand_dims(Y, -1)

    columns = ['%s (t-%d)' % (x_col, i) for i in range(n_steps, 0, -1)]

    if n_outs > 1:
        columns = columns + ['%s (t)' % (x_col) if i == 0 else '%s (t+%d)' % (x_col, i) for i in range(0, n_outs)]
    else:
        columns = columns + ['%s (t)' % (x_col)]


    _data = pd.DataFrame(np.concatenate([X, Y], axis=1), columns=columns)
    _data[ts_col] = ts

    _data = _data[[ts_col] + columns]
    _data = _data.dropna()
    _data = _data.reset_index(drop=True)


    return _data

In [161]:
# Making Data Sequence of Apple
sequence_len = 30
n_outs = 1
feature_apple = data_apple.columns[4]
target_apple = data_apple.columns[0]
new_data_apple = split_sequence(stock_data_a, ts_col=target_apple , x_col=feature_apple, n_steps=sequence_len, n_outs=n_outs)
new_data_apple.head(10)

Unnamed: 0,Date,Close (t-30),Close (t-29),Close (t-28),Close (t-27),Close (t-26),Close (t-25),Close (t-24),Close (t-23),Close (t-22),Close (t-21),Close (t-20),Close (t-19),Close (t-18),Close (t-17),Close (t-16),Close (t-15),Close (t-14),Close (t-13),Close (t-12),Close (t-11),Close (t-10),Close (t-9),Close (t-8),Close (t-7),Close (t-6),Close (t-5),Close (t-4),Close (t-3),Close (t-2),Close (t-1),Close (t)
0,2010-02-17,0.01533,0.015589,0.013208,0.012936,0.013913,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733
1,2010-02-18,0.015589,0.013208,0.012936,0.013913,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595
2,2010-02-19,0.013208,0.012936,0.013913,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716
3,2010-02-22,0.012936,0.013913,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843
4,2010-02-23,0.013913,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498
5,2010-02-24,0.012608,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498,0.006011
6,2010-02-25,0.010939,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498,0.006011,0.006946
7,2010-02-26,0.012985,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498,0.006011,0.006946,0.008775
8,2010-03-01,0.012133,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498,0.006011,0.006946,0.008775,0.011826
9,2010-03-02,0.00969,0.01605,0.013739,0.011184,0.003979,0.007693,0.009697,0.011051,0.005054,7e-06,0.001871,0.00266,0.005012,0.0,0.002381,0.001445,0.00289,0.002143,0.004621,0.005815,0.007924,0.00733,0.007595,0.006716,0.005843,0.003498,0.006011,0.006946,0.008775,0.011826,0.011728


In [162]:
# Making Data Sequence of Google

sequence_len = 30
n_outs = 1
feature_google = data_google.columns[4]
target_google = data_google.columns[0]
new_data_google = split_sequence(stock_data_g, ts_col=target_google , x_col=feature_google, n_steps=sequence_len, n_outs=n_outs)
new_data_google.head(10)

Unnamed: 0,Date,Close (t-30),Close (t-29),Close (t-28),Close (t-27),Close (t-26),Close (t-25),Close (t-24),Close (t-23),Close (t-22),Close (t-21),Close (t-20),Close (t-19),Close (t-18),Close (t-17),Close (t-16),Close (t-15),Close (t-14),Close (t-13),Close (t-12),Close (t-11),Close (t-10),Close (t-9),Close (t-8),Close (t-7),Close (t-6),Close (t-5),Close (t-4),Close (t-3),Close (t-2),Close (t-1),Close (t)
0,2010-02-17,0.090365,0.089057,0.081603,0.074892,0.078646,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405
1,2010-02-18,0.089057,0.081603,0.074892,0.078646,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078
2,2010-02-19,0.081603,0.074892,0.078646,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614
3,2010-02-22,0.074892,0.078646,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581
4,2010-02-23,0.078646,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917
5,2010-02-24,0.078214,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917,0.045211
6,2010-02-25,0.073177,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917,0.045211,0.042823
7,2010-02-26,0.07157,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917,0.045211,0.042823,0.042998
8,2010-03-01,0.072878,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917,0.045211,0.042823,0.042998,0.045789
9,2010-03-02,0.06821,0.071821,0.068404,0.069622,0.053997,0.049254,0.0504,0.050249,0.046548,0.044486,0.045946,0.045045,0.049642,0.042989,0.045126,0.046159,0.047567,0.046623,0.047548,0.045993,0.04987,0.048405,0.05078,0.049614,0.050581,0.046917,0.045211,0.042823,0.042998,0.045789,0.049756


In [163]:
print('******** Google Dataset Info **********')

x_cols_google= new_data_google.columns[1:-n_outs]
y_cols_google = new_data_google.columns[-n_outs:]
print('Features: \n%s' % str(list(x_cols_google)))
print('Target: \n%s' % str(list(y_cols_google)))


print('******** Apple Dataset Info **********')

x_cols_apple= new_data_apple.columns[1:-n_outs]
y_cols_apple = new_data_apple.columns[-n_outs:]
print('Features: \n%s' % str(list(x_cols_apple)))
print('Target: \n%s' % str(list(y_cols_apple)))

******** Google Dataset Info **********
Features: 
['Close (t-30)', 'Close (t-29)', 'Close (t-28)', 'Close (t-27)', 'Close (t-26)', 'Close (t-25)', 'Close (t-24)', 'Close (t-23)', 'Close (t-22)', 'Close (t-21)', 'Close (t-20)', 'Close (t-19)', 'Close (t-18)', 'Close (t-17)', 'Close (t-16)', 'Close (t-15)', 'Close (t-14)', 'Close (t-13)', 'Close (t-12)', 'Close (t-11)', 'Close (t-10)', 'Close (t-9)', 'Close (t-8)', 'Close (t-7)', 'Close (t-6)', 'Close (t-5)', 'Close (t-4)', 'Close (t-3)', 'Close (t-2)', 'Close (t-1)']
Target: 
['Close (t)']
******** Apple Dataset Info **********
Features: 
['Close (t-30)', 'Close (t-29)', 'Close (t-28)', 'Close (t-27)', 'Close (t-26)', 'Close (t-25)', 'Close (t-24)', 'Close (t-23)', 'Close (t-22)', 'Close (t-21)', 'Close (t-20)', 'Close (t-19)', 'Close (t-18)', 'Close (t-17)', 'Close (t-16)', 'Close (t-15)', 'Close (t-14)', 'Close (t-13)', 'Close (t-12)', 'Close (t-11)', 'Close (t-10)', 'Close (t-9)', 'Close (t-8)', 'Close (t-7)', 'Close (t-6)', 'Close 

In [164]:
new_data_google[new_data_google.columns[1:]] = new_data_google[new_data_google.columns[1:]].astype('float32')
new_data_apple[new_data_apple.columns[1:]] = new_data_apple[new_data_apple.columns[1:]].astype('float32')

In [165]:
def timeseries_split(data, ts_col, valid_step=100, valid_range=5, test_size=0.2):
    _data = data.copy()
    _data[ts_col] = pd.to_datetime(_data[ts_col], utc=False)

    train_test_splitter = len(_data) - int(len(_data) * test_size)

    _train_data = _data.iloc[:train_test_splitter]
    test_data = _data.iloc[train_test_splitter:]

    train_rngs = [[i, i + valid_step] for i in list(range(valid_step, len(_train_data), valid_step))]

    valid_rngs = train_rngs[5:][::valid_range]
    valid_data = []
    for valid_rng in valid_rngs:
        train_rngs.pop(train_rngs.index(valid_rng))
        valid_data.append(_train_data.iloc[valid_rng[0]: valid_rng[1]])
    
    train_data = []
    for train_rng in train_rngs:
        train_data.append(_train_data.iloc[train_rng[0]: train_rng[1]])

    train_data = pd.concat(train_data, 0)
    valid_data = pd.concat(valid_data, 0)
    

    train_data = train_data.reset_index(drop=True)
    valid_data = valid_data.reset_index(drop=True)
    test_data = test_data.reset_index(drop=True)

    return train_data, valid_data, test_data
    

In [166]:
# Making Train and Test Set for Apple Stock

train_data_apple, valid_data_apple, test_data_apple = timeseries_split(new_data_apple, target_apple, valid_step=35, valid_range=3, test_size=0.25)

x_train_apple, y_train_apple = train_data_apple.loc[:, x_cols_apple].values, train_data_apple.loc[:, y_cols_apple].values
x_valid_apple, y_valid_apple = valid_data_apple.loc[:, x_cols_apple].values, valid_data_apple.loc[:, y_cols_apple].values
x_test_apple, y_test_apple = test_data_apple.loc[:, x_cols_apple].values, test_data_apple.loc[:, y_cols_apple].values

x_train_apple = np.expand_dims(x_train_apple, -1)
x_valid_apple = np.expand_dims(x_valid_apple, -1)
x_test_apple = np.expand_dims(x_test_apple, -1)

x_train_apple = x_train_apple.astype(np.float32)
x_valid_apple = x_valid_apple.astype(np.float32)
x_test_apple = x_test_apple.astype(np.float32)

y_train_apple = y_train_apple.astype(np.float32)
y_valid_apple = y_valid_apple.astype(np.float32)
y_test_apple = y_test_apple.astype(np.float32)

print('Training %s %s' % (str(x_train_apple.shape), str(y_train_apple.shape)))
print('Validating %s %s' % (str(x_valid_apple.shape), str(y_valid_apple.shape)))
print('Testing %s %s' % (str(x_test_apple.shape), str(y_test_apple.shape)))

fig = make_subplots()

fig.add_trace(
    go.Scatter(x=train_data_apple['Date'], y=train_data_apple['Close (t)'], name='Close (t) Train'),
)

fig.add_trace(
    go.Scatter(x=valid_data_apple['Date'], y=valid_data_apple['Close (t)'], name='Close (t) Valid'),
)

fig.add_trace(
    go.Scatter(x=test_data_apple['Date'], y=test_data_apple['Close (t)'], name='Close (t) Test'),
)

fig.update_layout(
    title_text='Train/Valid/Test Split of Apple'
)

fig.show()

Training (1151, 30, 1) (1151, 1)
Validating (490, 30, 1) (490, 1)
Testing (558, 30, 1) (558, 1)


In [167]:
# Making Train and Test Set for Google Stock

train_data_google, valid_data_google, test_data_google = timeseries_split(new_data_google, target_google, valid_step=40, valid_range=3, test_size=0.25)

x_train_google, y_train_google = train_data_google.loc[:, x_cols_google].values, train_data_google.loc[:, y_cols_google].values
x_valid_google, y_valid_google = valid_data_google.loc[:, x_cols_google].values, valid_data_google.loc[:, y_cols_google].values
x_test_google, y_test_google = test_data_google.loc[:, x_cols_google].values, test_data_google.loc[:, y_cols_google].values

x_train_google = np.expand_dims(x_train_google, -1)
x_valid_google = np.expand_dims(x_valid_google, -1)
x_test_google = np.expand_dims(x_test_google, -1)

x_train_google = x_train_google.astype(np.float32)
x_valid_google = x_valid_google.astype(np.float32)
x_test_google = x_test_google.astype(np.float32)

y_train_google = y_train_google.astype(np.float32)
y_valid_google = y_valid_google.astype(np.float32)
y_test_google = y_test_google.astype(np.float32)

print('Training %s %s' % (str(x_train_google.shape), str(y_train_google.shape)))
print('Validating %s %s' % (str(x_valid_google.shape), str(y_valid_google.shape)))
print('Testing %s %s' % (str(x_test_google.shape), str(y_test_google.shape)))
fig = make_subplots()

fig.add_trace(
    go.Scatter(x=train_data_apple['Date'], y=train_data_apple['Close (t)'], name='Close (t) Train'),
)

fig.add_trace(
    go.Scatter(x=valid_data_apple['Date'], y=valid_data_apple['Close (t)'], name='Close (t) Valid'),
)

fig.add_trace(
    go.Scatter(x=test_data_apple['Date'], y=test_data_apple['Close (t)'], name='Close (t) Test'),
)

fig.update_layout(
    title_text='Train/Valid/Test Split of Apple'
)

fig.show()

Training (1156, 30, 1) (1156, 1)
Validating (480, 30, 1) (480, 1)
Testing (558, 30, 1) (558, 1)


In [168]:
def build_model(n_timesteps, n_features, n_outputs=1, rnn_units=None, dropout_rate=0.0, lr=0.001):
    rnn_units = rnn_units if isinstance(rnn_units, list) else [100, 100]

    inputs = tf.keras.layers.Input(shape=[n_timesteps, n_features], name='inputs')

    x = inputs

    for units in rnn_units[:-1]:
        x = tf.keras.layers.LSTM(units, return_sequences=True)(x)
        x = tf.keras.layers.Dropout(rate=dropout_rate)(x)
    
    x = tf.keras.layers.LSTM(rnn_units[-1], return_sequences=False)(x)
    x = tf.keras.layers.Dropout(rate=dropout_rate)(x)

    outputs = tf.keras.layers.Dense(n_outputs, activation='linear')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    opt = tf.keras.optimizers.Adam(lr=lr)
    model.compile(optimizer=opt, loss='mse')

    return model

In [169]:
def build_model_gru(n_timesteps, n_features, n_outputs=1, rnn_units=None, dropout_rate=0.0, lr=0.001):
    rnn_units = rnn_units if isinstance(rnn_units, list) else [100, 100]

    inputs = tf.keras.layers.Input(shape=[n_timesteps, n_features], name='inputs')

    x = inputs

    for units in rnn_units[:-1]:
        x = tf.keras.layers.GRU(units, return_sequences=True)(x)
        x = tf.keras.layers.Dropout(rate=dropout_rate)(x)
    
    x = tf.keras.layers.GRU(rnn_units[-1], return_sequences=False)(x)
    x = tf.keras.layers.Dropout(rate=dropout_rate)(x)

    outputs = tf.keras.layers.Dense(n_outputs,activation = 'linear')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    opt = tf.keras.optimizers.RMSprop(lr=lr,momentum=0.9)
    model.compile(optimizer=opt, loss='mse')

    return model

In [170]:
def build_model_simplernn(n_timesteps, n_features, n_outputs=1, rnn_units=None, dropout_rate=0.0, lr=0.001):
    rnn_units = rnn_units if isinstance(rnn_units, list) else [100, 100]

    inputs = tf.keras.layers.Input(shape=[n_timesteps, n_features], name='inputs')

    x = inputs

    for units in rnn_units[:-1]:
        x = tf.keras.layers.SimpleRNN(units, return_sequences=True)(x)
        x = tf.keras.layers.Dropout(rate=dropout_rate)(x)
    
    x = tf.keras.layers.SimpleRNN(rnn_units[-1], return_sequences=False)(x)
    x = tf.keras.layers.Dropout(rate=dropout_rate)(x)

    outputs = tf.keras.layers.Dense(n_outputs,activation = 'linear')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    opt = tf.keras.optimizers.Adam(lr=lr)
    model.compile(optimizer=opt, loss='mse')

    return model

In [171]:
print('** APPLE **')
[n_timesteps_apple, n_features_apple], n_outputs_apple = x_train_apple.shape[1:], y_train_apple.shape[1]
print('#%d Timesteps, #%d Features, #%d Target' % (n_timesteps_apple, n_features_apple, n_outputs_apple))

print('** GOOGLE **')
[n_timesteps_google, n_features_google], n_outputs_google = x_train_google.shape[1:], y_train_google.shape[1]
print('#%d Timesteps, #%d Features, #%d Target' % (n_timesteps_google, n_features_google, n_outputs_google))

** APPLE **
#30 Timesteps, #1 Features, #1 Target
** GOOGLE **
#30 Timesteps, #1 Features, #1 Target


In [172]:
print('*** APPLE ***')
model_1 = build_model_gru(n_timesteps_apple, n_features_apple, n_outputs=n_outputs_apple,
                    rnn_units=[200, 200, 100], dropout_rate=0.0, lr=0.001)
model_1.summary()

apple_fit = model_1.fit(
    x_train_apple, y_train_apple, 
    validation_data=(x_valid_apple, y_valid_apple),
    batch_size=128,
    verbose=1,
    epochs=250)


history_dict = apple_fit.history
# history_dict.keys()

loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = list(range(1, len(loss) + 1))



fig = make_subplots()
fig.add_trace(
    go.Scatter(x=epochs, y=loss, name='Training Loss'),
)
fig.add_trace(
    go.Scatter(x=epochs, y=val_loss, name='Validation loss'),
)
fig.update_layout(
    title_text='Training/Validation Loss for Apple Stock'
)
fig.show()

*** APPLE ***
Model: "functional_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 30, 1)]           0         
_________________________________________________________________
gru_24 (GRU)                 (None, 30, 200)           121800    
_________________________________________________________________
dropout_24 (Dropout)         (None, 30, 200)           0         
_________________________________________________________________
gru_25 (GRU)                 (None, 30, 200)           241200    
_________________________________________________________________
dropout_25 (Dropout)         (None, 30, 200)           0         
_________________________________________________________________
gru_26 (GRU)                 (None, 100)               90600     
_________________________________________________________________
dropout_26 (Dropout)         (None, 100

In [173]:
print('*** GOOGLE ***')
model_2 = build_model_gru(n_timesteps_google, n_features_google, n_outputs=n_outputs_google,
                    rnn_units=[200, 200, 100], dropout_rate=0.0, lr=0.001)
model_2.summary()

google_fit = model_2.fit(
    x_train_google, y_train_google, 
    validation_data=(x_valid_google, y_valid_google),
    batch_size=128,
    verbose=1,
    epochs=250)


history_dict = google_fit.history
# history_dict.keys()

loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = list(range(1, len(loss) + 1))



fig = make_subplots()
fig.add_trace(
    go.Scatter(x=epochs, y=loss, name='Training Loss'),
)
fig.add_trace(
    go.Scatter(x=epochs, y=val_loss, name='Validation loss'),
)
fig.update_layout(
    title_text='Training/Validation Loss for Google Stock'
)
fig.show()

*** GOOGLE ***
Model: "functional_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 30, 1)]           0         
_________________________________________________________________
gru_27 (GRU)                 (None, 30, 200)           121800    
_________________________________________________________________
dropout_27 (Dropout)         (None, 30, 200)           0         
_________________________________________________________________
gru_28 (GRU)                 (None, 30, 200)           241200    
_________________________________________________________________
dropout_28 (Dropout)         (None, 30, 200)           0         
_________________________________________________________________
gru_29 (GRU)                 (None, 100)               90600     
_________________________________________________________________
dropout_29 (Dropout)         (None, 10

In [174]:
#Accuracy

loss_apple= model_1.evaluate(x_train_apple, y_train_apple, verbose=1)
accuracy = 1 - loss_apple
print("accuracy of apple prediction is:", accuracy)

loss_google= model_2.evaluate(x_train_google, y_train_google, verbose=1)
accuracy = 1 - loss_google
print("accuracy of google prediction is:" ,accuracy)

accuracy of apple prediction is: 0.9999448938942805
accuracy of google prediction is: 0.9998734090913786


In [175]:
#Accuracy

loss_apple= model_1.evaluate(x_valid_apple, y_valid_apple, verbose=1)
accuracy = 1 - loss_apple
print("Validation accuracy of apple prediction is:", accuracy)

loss_google= model_2.evaluate(x_valid_google, y_valid_google, verbose=1)
accuracy = 1 - loss_google
print("Validation accuracy of google prediction is:" ,accuracy)

Validation accuracy of apple prediction is: 0.9999282743447111
Validation accuracy of google prediction is: 0.9999125299291336


In [176]:
def prediction_point_by_point(model, x):
    predicted = model.predict(x)
    return predicted

In [177]:
xx_google = x_test_google
yy_google = y_test_google
tt_google = test_data_google

if len(yy_google.shape) > 1:
    yy_google = scaler.inverse_transform(yy_google)
else:
    yy_google = scaler.inverse_transform(yy_google.reshape(-1, 1)).flatten()
    
pp_google = prediction_point_by_point(model_2, xx_google)

if len(pp_google.shape) > 1:
    pp_google = scaler.inverse_transform(pp_google)
else:
    pp_google = scaler.inverse_transform(pp_google.reshape(-1, 1)).flatten()


xx_apple = x_test_apple
yy_apple = y_test_apple
tt_apple = test_data_apple

if len(yy_apple.shape) > 1:
    yy_apple = scaler.inverse_transform(yy_apple)
else:
    yy_apple = scaler.inverse_transform(yy_apple.reshape(-1, 1)).flatten()
    
pp_apple = prediction_point_by_point(model_1, xx_apple)

if len(pp_apple.shape) > 1:
    pp_apple = scaler.inverse_transform(pp_apple)
else:
    pp_apple = scaler.inverse_transform(pp_apple.reshape(-1, 1)).flatten()

In [178]:
def error(y_true,y_pred):
  check_array(y_true, y_pred)
  mse_error = mse(y_true,y_pred)
  rmse_error = sqrt(mse_error)
  mape_error = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return 'MSE_ERROR: %.2f\nRMSE_ERROR: %.2f\nMAPE_ERROR: %.2f' % (mse_error,rmse_error,mape_error)

print('**** APPLE ERROR SUMMARY ***')
apple_error = error(pp_apple,yy_apple)
print(apple_error)
print('**** GOOGLE ERROR SUMMARY ***')
google_error = error(pp_google,yy_google)
print(google_error)

**** APPLE ERROR SUMMARY ***
MSE_ERROR: 16.49
RMSE_ERROR: 4.06
MAPE_ERROR: 1.93
**** GOOGLE ERROR SUMMARY ***
MSE_ERROR: 8.71
RMSE_ERROR: 2.95
MAPE_ERROR: 1.18


In [179]:
#compare the model performance in predicting the price

fig = make_subplots()

fig.add_trace(
    go.Scatter(x=tt_apple['Date'], y=yy_apple[:, 0], name='True Target Apple'),
)
fig.add_trace(
    go.Scatter(x=tt_apple['Date'], y=pp_apple[:, 0], name='Predicted Apple'),
)
fig.add_trace(
    go.Scatter(x=tt_google['Date'], y=yy_google[:, 0], name='True Target Google'),
)
fig.add_trace(
    go.Scatter(x=tt_google['Date'], y=pp_google[:, 0], name='Predicted Google'),
)

fig.update_layout(
    title_text='Point-By-Point Prediction'
)

fig.show()

In [180]:
def prediction_by_date(model, dt, date_time_str, x_cols, y_cols, scaler, tc='Date', tcf='%Y-%m-%d', freq=7):
    cdt = dt[dt[tc] == date_time_str].index
    cdt = list(cdt)

    if not len(cdt) > 0:
        print('There is no data with your specific datetime [%s]' % date_time_str)
        return False, None

    indices = [cdt[0] + i for i in range(freq)]
    cdt = dt.iloc[indices]
    
    ts = cdt[tc]
    tsf = ts.dt.strftime(tcf).values
    y = cdt[y_cols].values.flatten()
    x = cdt[x_cols].values[:1]
    p = []

    for i in range(freq):
        pp = model.predict(x)
        x = np.concatenate([x[:, 1:], pp], 1)
        p.append(pp[0][0])

    p = np.array(p)

    y = scaler.inverse_transform(y.reshape(-1, 1)).flatten()
    p = scaler.inverse_transform(p.reshape(-1, 1)).flatten()
    e = y - p

    for v1, v2, v3, v4 in zip(tsf, y, p, e):
        print('[%s], Target: %.5f, Predicted %.5f, Error: %f' % (v1, v2, v3, v4))

    data = {
        target_apple: ts,
        'Y': y,
        'P': p,
    }
    data = pd.DataFrame.from_dict(data)
    data = data.reset_index(drop=True)

    return True, data

In [181]:
trend_data = {}
dates = ['14 Nov 2017']
for date in dates:
    print('Start from %s' % date)
    s1, df1 = prediction_by_date(model_1, tt_apple, date, x_cols=x_cols_apple, y_cols=y_cols_apple, freq=2, scaler=scaler)
    if s1:
        trend_data[date] = df1
    print()

Start from 14 Nov 2017
[2017-11-14], Target: 171.34000, Predicted 171.20796, Error: 0.132034
[2017-11-15], Target: 169.08000, Predicted 168.95763, Error: 0.122375



In [182]:
fig = make_subplots()

fig.add_trace(
    go.Scatter(x=tt_apple['Date'], y=yy_apple[:, 0], name='True Target (t)', line=dict(width=2, color='#512b58')),
)
fig.add_trace(
    go.Scatter(x=tt_apple['Date'], y=pp_apple[:, 0], name='Predicted (t)', line=dict(width=2, color="#40bad5")),
)

for k, df in trend_data.items():
    fig.add_trace(
        go.Scatter(x=df1['Date'], y=df1['P'], name='Trend [%s]' % k, mode='lines', line=dict(width=6, color='#d8345f')),
    )

fig.update_layout(
    title_text='Trend Prediction'
)

fig.show()

In [183]:
trend_data = {}
dates = ['14 Nov 2017']
for date in dates:
    print('Start from %s' % date)
    s2, df2 = prediction_by_date(model_2, tt_google, date, x_cols=x_cols_google, y_cols=y_cols_google, freq=2, scaler=scaler)
    if s2:
        trend_data[date] = df2
    print()

Start from 14 Nov 2017
[2017-11-14], Target: 184.89221, Predicted 185.81593, Error: -0.923721
[2017-11-15], Target: 183.90126, Predicted 186.68044, Error: -2.779175



In [184]:
fig = make_subplots()

fig.add_trace(
    go.Scatter(x=tt_google['Date'], y=yy_google[:, 0], name='True Target (t)', line=dict(width=2, color='#512b58')),
)
fig.add_trace(
    go.Scatter(x=tt_google['Date'], y=pp_google[:, 0], name='Predicted (t)', line=dict(width=2, color="#40bad5")),
)

for k, df in trend_data.items():
    fig.add_trace(
        go.Scatter(x=df2['Date'], y=df2['P'], name='Trend [%s]' % k, mode='lines', line=dict(width=4, color='#d8345f')),
    )

fig.update_layout(
    title_text='Trend Prediction'
)

fig.show()

In [185]:
#Making Train and Test Set for Apple Stock

# cut_point = 2000
# cut_point_tv = cut_point - int(cut_point * 0.3)

# train_data_apple = new_data_apple.iloc[:cut_point_tv]
# valid_data_apple = new_data_apple.iloc[cut_point_tv:cut_point]
# test_data_apple = new_data_apple.iloc[cut_point:]

# x_train_apple, y_train_apple = train_data_apple.loc[:, x_cols_apple].values, train_data_apple.loc[:, y_cols_apple].values
# x_valid_apple, y_valid_apple = valid_data_apple.loc[:, x_cols_apple].values, valid_data_apple.loc[:, y_cols_apple].values
# x_test_apple, y_test_apple = test_data_apple.loc[:, x_cols_apple].values, test_data_apple.loc[:, y_cols_apple].values

# x_train_apple = np.expand_dims(x_train_apple, -1)
# x_valid_apple = np.expand_dims(x_valid_apple, -1)
# x_test_apple = np.expand_dims(x_test_apple, -1)

# x_train_apple = x_train_apple.astype(np.float32)
# x_valid_apple = x_valid_apple.astype(np.float32)
# x_test_apple = x_test_apple.astype(np.float32)

# y_train_apple = y_train_apple.astype(np.float32)
# y_valid_apple = y_valid_apple.astype(np.float32)
# y_test_apple = y_test_apple.astype(np.float32)

# print('Training %s %s' % (str(x_train_apple.shape), str(y_train_apple.shape)))
# print('Validating %s %s' % (str(x_valid_apple.shape), str(y_valid_apple.shape)))
# print('Testing %s %s' % (str(x_test_apple.shape), str(y_test_apple.shape)))

In [186]:
# fig = make_subplots()

# fig.add_trace(
#     go.Scatter(x=train_data_google['Date'], y=train_data_google['Close'], name='Close (t) Train'),
# )

# fig.add_trace(
#     go.Scatter(x=valid_data_google['Date'], y=valid_data_google['Close'], name='Close(t) Valid'),
# )

# fig.add_trace(
#     go.Scatter(x=test_data_google['Date'], y=test_data_google['Close'], name='Close (t) Test'),
# )

# fig.update_layout(
#     title_text='Train/Valid/Test Split Version 1.0'
# )

# fig.show()