In [42]:
import os 
import pandas as pd
import numpy as np
import h5py
import json
from numpy import nan
import requests
import os.path
from sklearn import preprocessing
from keras.optimizers import RMSprop

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

### Load Data

In [43]:
def get_test_data():
    df = pd.read_hdf('./checkpoints/data_test.h5','table')
    return df.values

In [44]:
def normalize_data(float_data):
    mean = float_data.mean(axis=0)
    float_data -= mean
    std = float_data.std(axis=0)
    float_data /= std
    return float_data

In [45]:
data = get_test_data()
data = normalize_data(data)

In [46]:
data

array([[-3.37814783e-01, -2.25267417e-05, -7.14610900e-06,
        -3.04541430e-05, -3.16574836e-01, -1.18456275e-01],
       [-3.37814783e-01, -2.25267417e-05, -7.14610900e-06,
        -3.04541430e-05, -3.16574836e-01, -1.18456275e-01],
       [-3.37814783e-01, -2.25267417e-05, -7.14610900e-06,
        -3.04541430e-05, -3.16574836e-01, -1.18456275e-01],
       ...,
       [ 1.18226921e+00, -3.73320798e-01,  1.97613972e+00,
        -2.01228691e-01,  1.37781913e+00,  4.41700749e-01],
       [ 1.18226921e+00, -3.73320798e-01,  1.97613972e+00,
        -2.01228691e-01,  1.37781913e+00,  7.21779261e-01],
       [ 8.02248215e-01, -3.73320798e-01,  1.97613972e+00,
        -2.01228691e-01,  1.37781913e+00,  7.21779261e-01]])

### Create Train Test Split

In [47]:
# Coppied from Deep Learning with Python by Francois Chollet
def generator(data, lookback, delay, min_index, max_index,
              shuffle=False, batch_size=128, step=6):
    """ Generator yielding timeseries samples and their targets
    
    Arguments:
        data {[type]} -- The original array of floating-point data,
        lookback {[type]} -- How many timesteps back the input data should go.
        delay {[type]} -- How many timesteps in the future the target should be.
        min_index {[type]} -- Indices in the data array that delimit which timesteps to draw from.
        max_index {[type]} -- Indices in the data array that delimit which timesteps to draw from.
    
    Keyword Arguments:
        shuffle {bool} -- Whether to shuffle the samples or draw them in chronological order. (default: {False})
        batch_size {int} -- The number of samples per batch. (default: {128})
        step {int} -- The period, in timesteps, at which you sample data. (default: {6})
    """

    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows),
                           lookback // step,
                           data.shape[-1]))
        targets = np.zeros((len(rows), data.shape[-1]))
        for j, row in enumerate(rows):
#             import pdb;pdb.set_trace()
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets

In [48]:
def get_train_test_val(data, lookback, step):
    train_index = int(len(data) * (.5))
    test_index = train_index + int(len(data) * (.25))

    delay = 1440
    
    train_gen = generator(data,
                         lookback=lookback,
                         delay=delay,
                         min_index=0,
                         max_index=train_index,
                         step=step
                         )
    
    test_gen = generator(data,
                         lookback=lookback,
                         delay=delay,
                         min_index=train_index + 1,
                         max_index=test_index,
                         step=step
                         )
    
    val_gen = generator(data,
                         lookback=lookback,
                         delay=delay,
                         min_index=test_index + 1,
                         max_index=len(data),
                         step=step
                         )
    
    val_steps = (len(data) - (test_index +1) - lookback) // 128
    
    test_steps = (test_index - (train_index +1) - lookback) // 128
    
    return train_gen, test_gen, val_gen, val_steps, test_steps

In [29]:
def fake_generator(data):
    yield 68, 70

In [30]:
from keras.models import Sequential
from keras.layers import Dense, Flatten

In [49]:
def get_model(float_data, lookback, step):
    model = Sequential()
    model.add(Flatten(input_shape=(10080, float_data.shape[-1])))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(float_data.shape[-1]))
    return model

In [50]:
def train_model():
    lookback = 10080
    step = 1
    
    float_data = get_test_data()
    normalized_data = normalize_data(float_data)
    train_gen, test_gen, val_gen, val_steps, test_steps = get_train_test_val(normalized_data, lookback, step)
    
    model = get_model(normalized_data, lookback, step)    
    print(model.summary())
    model.compile(optimizer=RMSprop(), loss='mae', metrics=['acc'])
    history = model.fit_generator(train_gen,
                                 steps_per_epoch=500,
                                 epochs=40,
                                 validation_data=val_gen, 
                                 validation_steps=val_steps) 
    return history

In [None]:
train_model()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_8 (Flatten)          (None, 60480)             0         
_________________________________________________________________
dense_15 (Dense)             (None, 32)                1935392   
_________________________________________________________________
dense_16 (Dense)             (None, 6)                 198       
Total params: 1,935,590
Trainable params: 1,935,590
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/40