In [None]:
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Time-series forecasting with Deep Learning
Based on [How to use Keras TimeseriesGenerator for time series data](https://www.dlology.com/blog/how-to-use-keras-timeseriesgenerator-for-time-series-data/)

#### Instalation
`mpld3` enables zooming of the matplotlib and interactive data visualizations on Jupyter notebook.

Install it with `pip install mpld3`

Also `pip install pydot`

In [None]:
%matplotlib inline
import mpld3
mpld3.enable_notebook()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

#### Functions definitions

In [None]:
def delta_time_series(data):
    return data[1:]- data[:-1]

def plot_delta(data):
    plt.plot(delta_time_series(data))
    plt.ylabel('close')
    plt.show()
    
# get all targets y from a TimeseriesGenerator instance.
def get_y_from_generator(gen):
    y = None
    for i in range(len(gen)):
        batch_y = gen[i][1]
        if y is None:
            y = batch_y
        else:
            y = np.append(y, batch_y)
    y = y.reshape((-1,1))
    print(y.shape)
    return y

# compute the match score of two binary numpy arrays.
def binary_accuracy(a, b):
    assert len(a) == len(b)
    return (a == b).sum() / len(a)    

#### Load data
DJI, AAPL, AMAZN close index/price during 2015/09/19-2018/09/18
- URL [YAHOO finance](https://finance.yahoo.com/quote/AMZN/history?period1=1442592000&period2=1537286400&interval=1d&filter=history&frequency=1d).

In [None]:
dataset_DJI = read_csv('./data/DJI_20150919-20180918.csv', usecols=[4]).values.astype('float32')
dataset_delta_DJI = delta_time_series(dataset_DJI)

dataset = dataset_delta_DJI
print(dataset.shape)
plt.plot(dataset_DJI)
#plot_delta(dataset)


## Single time series as input
#### Data normalization

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

#### Data splitting and transformation

In [None]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

look_back = 3
train_data_gen = TimeseriesGenerator(train, 
                                     train,
                                     length=look_back, 
                                     sampling_rate=1,
                                     stride=1,
                                     batch_size=3
                                    )
test_data_gen = TimeseriesGenerator(test, 
                                    test,
                                    length=look_back, 
                                    sampling_rate=1,
                                    stride=1,
                                    batch_size=1
                                   )

#### Training

In [None]:
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)
from IPython.display import Image
Image(filename='model.png')

In [None]:
history = model.fit_generator(train_data_gen, epochs=100).history

In [None]:
model.evaluate_generator(test_data_gen)

In [None]:
trainPredict = model.predict_generator(train_data_gen)
trainPredict.shape

In [None]:
testPredict = model.predict_generator(test_data_gen)
testPredict.shape

#### Invert transformation

In [None]:
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

In [None]:
trainY = get_y_from_generator(train_data_gen)
testY = get_y_from_generator(test_data_gen)

In [None]:
trainY = scaler.inverse_transform(trainY)
testY = scaler.inverse_transform(testY)

#### Model evaluation

In [None]:
# RMSE
trainScore = math.sqrt(mean_squared_error(trainY[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:, 0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

#### Invert transformation

In [None]:
dataset = scaler.inverse_transform(dataset)
dataset.shape

#### Plotting

In [None]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
trainPredictPlot = trainPredictPlot + dataset_DJI[1:]

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2):len(dataset), :] = testPredict
testPredictPlot = testPredictPlot + dataset_DJI[1:]

In [None]:
# plot baseline and predictions
plt.plot(dataset + dataset_DJI[1:])
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

In [None]:
# plot baseline and predictions
plt.plot(dataset + dataset_DJI[:-1])
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

## Multiple time series as input

DÚ :)