In [2]:
import tensorflow as tf
import numpy as np
import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Build a one hidden layer Recurrent Neural Network

### Setup

In [3]:
node = [1,2,3,4,5,6,7,8,9,12,13]
start = datetime.datetime.strptime("2018-08-27", "%Y-%m-%d")
end = datetime.datetime.strptime("2018-9-27", "%Y-%m-%d")
date = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

## Sample Data

### Data Example (Mean, Median, Max, Min) 1 hr -> 0.5 hr

### Helper Function

In [4]:
def combine(data, slots=6):
    result = []
    for arr in data:
        result.append(calculate(arr, slots))

    return np.array(result)

def calculate(arr, slots):
    length = arr.shape[0]
    result = []
    for i in range(0, length, slots):
        tmp = arr[i:i + 6]
        tmp_mean = np.mean(tmp[:, 0:11], axis=0)
        tmp_median = np.median(tmp[:, 11:22], axis=0)
        tmp_max = np.max(tmp[:, 22:33], axis=0)
        tmp_min = np.min(tmp[:, 33:44], axis=0)
        tmp = ans = np.hstack((tmp_mean, tmp_median, tmp_max, tmp_min))
        result.append(tmp)

    return np.array(result)

### Data Loader (Cross Validation)

In [5]:
def concatenate(date_list, node):
    data = []
    for date in date_list:
        if date.weekday() >= 5:
            continue
        tmp = np.load("./data/m4/" + str(date)[:10] + '_4.npy')
        if tmp.shape[0] < 286:
            continue
        data.append(tmp)
    return np.array(data)

class Data:

    def __init__(self, time, batch_size = 32):
        self.odata = concatenate(date_list=date, node=node)
        self.data = None
        self.data_init(miniute=5, time=10)
        self.data = combine(self.data)

        self.length = len(self.data)

        self.train_len = int(self.length * 0.8)
        self.val_len = int(self.length - self.train_len)

        self.index = None
        self.train_index = None
        self.train_pos = 0
        self.val_index = None
        self.val_pos = 0
        self.batch_size = batch_size

    def data_init(self, miniute, time):
        data = self.odata
        result = []
        start = int(9 * 60 / miniute)
        for i in range(start, 286 - 42, int(time / miniute)):
            result.append(data[:, i:i + 42, :])
        self.data =  np.array(result).reshape(-1, 42, 44)
            
    def reset_index(self):
        self.index = np.arange(self.length)
        
    def shuffle_index(self):
        self.train_pos = 0
        self.val_pos = 0
        np.random.shuffle(self.index)
        self.train_index = self.index[:self.train_len]
        self.val_index = self.index[self.train_len:]

    def get_train(self):
        train = self.data[self.train_index[self.train_pos:  self.train_pos + self.batch_size]][:, 0:-1]
        label = self.data[self.train_index[self.train_pos: self.train_pos + self.batch_size]][:, 1:, 0:11]
        self.train_pos += self.batch_size
        
        return train, label

    def get_val(self):
        train = self.data[self.val_index[self.val_pos: self.val_pos + self.batch_size]][:, 0:-1]
        label = self.data[self.val_index[self.val_pos: self.val_pos + self.batch_size]][:, 1:, 0:11]
        self.val_pos += self.batch_size

        return train, label

In [6]:
a = Data(10)

### RNN with 6 time step. Xt (batch_size, 11), h is the prediction at t

In [10]:
num_input = 44
num_time_step = 6
# num_neuron_per_layer = 256
num_neuron_per_layer = 512
num_layers = 1
num_output = 11
learning_rate = 0.001
nepoches = 150

tf.reset_default_graph()
X = tf.placeholder(dtype=tf.float32, shape=[None, num_time_step, num_input])
y = tf.placeholder(dtype=tf.float32, shape=[None, num_time_step, num_output])

cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_neuron_per_layer, activation=tf.nn.relu)
cell = tf.contrib.rnn.AttentionCellWrapper(cell, attn_length=6)
cell = tf.contrib.rnn.OutputProjectionWrapper(cell, num_output)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

loss = tf.reduce_mean(tf.square(y - outputs))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
a.reset_index()

with tf.Session() as sess:
    sess.run(init)
    f = open("log_10min_att_128.txt", "w")
    for epoch in range(nepoches):
        iteration = a.train_len // a.batch_size
        a.shuffle_index()
        for i in range(iteration):
            x_batch, y_batch = a.get_train()
            sess.run(train, feed_dict={X:x_batch, y:y_batch})
        
        if epoch % 10 == 0:
            x_batch, y_batch = a.get_val()
            mse = loss.eval(feed_dict={X:x_batch, y:y_batch})
            print (epoch, "\tMSE:", mse)
            f.write(str(epoch) + "\tMSE:" + str(mse) + "\n")
            
            if mse < 5:
                saver.save(sess, "./RNN_MODEL_10_att_128" + str(epoch))
                f.close()
                break

#     f.close()
#     saver.save(sess, "./RNN_MODEL_10_atten_final" )

0 	MSE: 46.19135
10 	MSE: 27.552986
20 	MSE: 21.164125
30 	MSE: 13.186788
40 	MSE: 16.108063
50 	MSE: 9.81134
60 	MSE: 11.000569
70 	MSE: 7.908967
80 	MSE: 9.9406185
90 	MSE: 5.7461085
100 	MSE: 6.6991568
110 	MSE: 7.610714
120 	MSE: 5.8232465
130 	MSE: 4.3707285
