In [2]:
import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt

from get_data import GetData
from data_loader import DataLoader
from pre_processing import PreProcessing

from keras.models import Sequential
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

Using TensorFlow backend.


In [3]:
configs = json.load(open('config-test.json', 'r'))
configs

{'data': {'symbol': 'AMZN',
  'columns': ['Close'],
  'start': '2008-04-01',
  'end': '2019-10-01',
  'sequence_length': 5,
  'train_test_split': 0.85,
  'save_dir': 'data',
  'correlates_to': ['NOV',
   'EQIX',
   'CTAS',
   'CRM',
   'BSX',
   'IT',
   'V',
   'MSI',
   'CLX',
   'WAT']},
 'preprocessing': {'denoise': {'wavelet': 'haar', 'thr_mode': 'garotte'},
  'filename': 'preprocessed.csv',
  'save_dir': 'preprocessing'},
 'training': {'epochs': 2, 'batch_size': 32},
 'model': {'loss': 'mae',
  'optimizer': 'adam',
  'save_dir': 'model',
  'layers': [{'type': 'lstm',
    'neurons': 100,
    'activation': 'relu',
    'input_timesteps': 4,
    'input_dim': 1,
    'return_sequences': False},
   {'type': 'repeat_vector', 'output_shape': 1},
   {'type': 'lstm',
    'neurons': 100,
    'activation': 'relu',
    'return_sequences': True},
   {'type': 'time_distributed', 'layer': {'type': 'dense', 'neurons': 1}}]}}

In [4]:
dataloader = DataLoader(os.path.join(configs['data']['save_dir'], configs['data']['symbol'] + '.csv'),
                    configs['data']['train_test_split'],
                    configs['data']['columns'])

In [5]:
n_in = len(dataloader.data)
sequence_in = dataloader.data.reshape((1, n_in, 1))
sequence_in.shape

(1, 2818, 1)

In [6]:
# LSTM Autoencoder for reconstruct
timestemps = sequence_in.shape[1]
n_features = sequence_in.shape[2]

model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(timestemps, n_features), return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=False))
model.add(RepeatVector(timestemps))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))

'''model.add(LSTM(100, activation='relu', input_shape=(n_in,1)))
model.add(RepeatVector(n_in))
model.add(LSTM(100, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))
'''
model.compile(optimizer='adam', loss='mse')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100)               40800     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 2815, 100)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 2815, 100)         80400     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2815, 1)           101       
Total params: 121,301
Trainable params: 121,301
Non-trainable params: 0
_________________________________________________________________


In [7]:
# fit model
model.fit(sequence_in, sequence_in, epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f0d506d2cf8>

In [None]:
# load the model

In [11]:
yhat = model.predict(sequence_in, verbose=0)

In [13]:
print(sequence_in[0,0,0])
print(np.reshape(sequence_in, (sequence_in.size,))[:8])
print(yhat[0,0,0])
print(np.reshape(yhat, (yhat.size,))[:8])

76.699997
[76.699997 77.370003 74.940002 76.870003 76.900002 77.300003 74.389999
 74.830002]
nan
[nan nan nan nan nan nan nan nan]


In [31]:
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(np.reshape(X_train, (X_train.size,)), label='True Data')
plt.plot(np.reshape(yhat, (yhat.size,)), label='Prediction')
plt.legend()
plt.show()