In [1]:
import tensorflow as tf
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import scipy.sparse as sp
import pandas as pd
import pickle
from datetime import timedelta
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
#from tensorflow.contrib.rnn.python.ops import rnn_cell as RNNCell
from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell
import collections
from tensorflow.contrib import rnn
import h5py
#from tensorflow.python.ops.rnn_cell_impl import _RNNCell 
import random

In [2]:
def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    #adj[np.isnan(adj)] = 0.
    adj = tf.abs(adj)
    rowsum = tf.reduce_sum(adj, 1)# sum by row

    d_inv_sqrt = tf.pow(rowsum, -0.5)
   
    #d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    
    d_mat_inv_sqrt = tf.diag(d_inv_sqrt)

    return tf.matmul(tf.matmul(d_mat_inv_sqrt, adj), d_mat_inv_sqrt)

def masked_mae_tf(preds, labels, null_val=np.nan):
    """
    Accuracy with masking.
    :param preds:
    :param labels:
    :param null_val:
    :return:
    """
    #print (preds.shape)
    #print (labels.shape)
    if np.isnan(null_val):
        mask = ~tf.is_nan(labels)
    else:
        mask = tf.not_equal(labels, null_val)
    mask = tf.cast(mask, tf.float32)
    mask /= tf.reduce_mean(mask)
    mask = tf.where(tf.is_nan(mask), tf.zeros_like(mask), mask)
    loss = tf.abs(tf.subtract(preds, labels))
    loss = loss * mask
    loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss)
    return tf.reduce_mean(loss)

def masked_mae_tf_by_horizon(preds, labels, null_val=np.nan):
    """
    Accuracy with masking.
    :param preds:
    :param labels:
    :param null_val:
    :return:
    """
    preds_reshape = tf.reshape(preds, [-1, sn, horizon])
    labels_reshape = tf.reshape(labels, [-1, sn, horizon])
    
    res = []
    
    for i in range(horizon):
        labels = labels_reshape[:, :, 0:(i+1)]
        preds = preds_reshape[:, :, 0:(i+1)]
        
        if np.isnan(null_val):
            mask = ~tf.is_nan(labels)
        else:
            mask = tf.not_equal(labels, null_val)
        mask = tf.cast(mask, tf.float32)
        mask /= tf.reduce_mean(mask)
        mask = tf.where(tf.is_nan(mask), tf.zeros_like(mask), mask)
        loss = tf.abs(tf.subtract(preds, labels))
        loss = loss * mask
        loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss)
        
        res.append(tf.reduce_mean(loss))
        
    return res

In [3]:
class StandardScaler:
    """
    Standard the input
    """

    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def transform(self, data):
        return (data - self.mean) / self.std

    def inverse_transform(self, data):
        return (data * self.std) + self.mean


In [4]:
# install tables
raw_data = pd.read_hdf('../../data/METR-LA/metr-la.h5')

In [5]:
raw_data.shape

(34272, 207)

In [6]:
raw_data.head()

Unnamed: 0,773869,767541,767542,717447,717446,717445,773062,767620,737529,717816,...,772167,769372,774204,769806,717590,717592,717595,772168,718141,769373
2012-03-01 00:00:00,64.375,67.625,67.125,61.5,66.875,68.75,65.125,67.125,59.625,62.75,...,45.625,65.5,64.5,66.428571,66.875,59.375,69.0,59.25,69.0,61.875
2012-03-01 00:05:00,62.666667,68.555556,65.444444,62.444444,64.444444,68.111111,65.0,65.0,57.444444,63.333333,...,50.666667,69.875,66.666667,58.555556,62.0,61.111111,64.444444,55.888889,68.444444,62.875
2012-03-01 00:10:00,64.0,63.75,60.0,59.0,66.5,66.25,64.5,64.25,63.875,65.375,...,44.125,69.0,56.5,59.25,68.125,62.5,65.625,61.375,69.857143,62.0
2012-03-01 00:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2012-03-01 00:20:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# revised based on https://github.com/transpaper/gconvRNN/blob/master/model.py

def conv(x, ddgf, feat_out, K, W):
    '''
    x : [batch_size, N_node, feat_in] - input of each time step
    nSample : number of samples = batch_size, let's say it is 100
    nNode : number of node in graph
    feat_in : number of input feature, usually is set as 1
    feat_out : number of output feature
    ddgf : data driven graph filter
    K : size of kernel(number of cheby coefficients), is 1 in the fast graph paper
    W : cheby_conv weight [K * feat_in, feat_out]
    '''
    if len(x.shape) == 2:
        x = tf.expand_dims(x, 2) # extend a dimension "feature_in"
    nSample, nNode, feat_in = x.get_shape()
    #feat_in = 1
    print (nSample, nNode, feat_in)
    nSample, nNode, feat_in = int(nSample), int(nNode), int(feat_in)
    
    x0 = tf.transpose(x, perm=[1, 2, 0]) #change it to [nNode, feat_in, nSample]
    x0 = tf.reshape(x0, [nNode, feat_in*nSample]) # feature_in = 1, [nNode, nSample]
    #x = tf.expand_dims(x0, 0) # make it [1, nNode, feat_in*nSample]
    
    x0 = tf.matmul(ddgf, x0) # graph convolutional #[nNode, nSample]
        
    #x = tf.reshape(x, [K, nNode, feat_in, nSample])
    #x = tf.transpose(x, perm=[3,1,2,0])
    x0 = tf.reshape(x0, [nSample*nNode, feat_in*K]) #[nSample*nNode, 1]
    
    x = tf.matmul(x0, W) #No Bias term?? -> Yes
    out = tf.reshape(x, [nSample, nNode, feat_out]) 
    return out


In [160]:
'''
# Create model
def gcn(x, weights, biases, batch_size, n_input, frequency,flag, n_output_vec):
    # Hidden layer with RELU activation
    
    output_list = tf.Variable(tf.zeros([n_output_vec,1]),dtype=tf.float32) #'Tensor' object does not support item assignment, cant build Ypre

    
    for i in range(num):
        Xtem = tf.reshape(x[i,:], [sn, 1]) # 207 by 1
        Xtem = tf.transpose(Xtem) # 1 by 207
        Xtem = tf.cast(Xtem, dtype=np.float32)
        #Xtem = tf.reshape(x[i,:], [n_input, frequency])
        #Xtem = tf.transpose(Xtem)
        #Atem = tf.convert_to_tensor(A_whole_final, dtype=np.float32)
        #Atem1 = tf.diag(tf.ones([n_input])) # Atem not palying any roles
        #Ytem = tf.reshape(Y[i,:], [n_input, 1])
        #Atem = tf.diag(tf.ones([n_input]))
        Atem1 = 0.5*(weights['A1'] + tf.transpose(weights['A1']))#+ Atem 
        Atem1 = normalize_adj(Atem1)
        #th = tf.constant(0.01, dtype=tf.float32)
        #where = tf.subtract(Atem1, th)
        #Atem1 = tf.nn.relu(where)
        
        Z1 = tf.matmul(Xtem, Atem1) #+ tf.matmul( tf.matmul(weights['A1'], weights['A1']), Xtem)
        
        #layer_1 = tf.matmul(Z1, weights['h1']) 
        layer_1 = tf.add(tf.matmul(Z1, weights['h1']), biases['b1'])
        layer_1 = tf.nn.relu(layer_1)
        
        '''
        Atem2 = 0.5*(weights['A2'] + tf.transpose(weights['A2']))#+ Atem 
        Atem2 = normalize_adj(Atem2)
        
        Z2 = tf.matmul(Atem2, layer_1)
        layer_2 = tf.add(tf.matmul(Z2, weights['h2']), biases['b2'])
        layer_2 = tf.nn.relu(layer_2)
        
        Atem3 = 0.5*(weights['A3'] + tf.transpose(weights['A3']))#+ Atem 
        Atem3 = normalize_adj(Atem3)
        Z3 = tf.matmul(Atem3, layer_2)
        layer_3 = tf.add(tf.matmul(Z3, weights['h3']), biases['b3'])
        layer_3 = tf.nn.relu(layer_3)
        '''
        #Atem4 = 0.5*(weights['A4'] + tf.transpose(weights['A4']))#+ Atem 
        #Atem4 = normalize_adj(Atem4)
        #Z4 = tf.matmul(Atem4, layer_3)
        #layer_4 = tf.add(tf.matmul(Z4, weights['h4']), biases['b4'])
        #layer_4 = tf.nn.relu(layer_4)
        
        # flattern
        #layer_3 = tf.reshape(layer_3, [1, 272*n_hidden_vec3])
        
        #F1 = tf.add(tf.matmul(layer_3, weights['f1']), biases['bf1'])
        #F1 = tf.nn.relu(F1)
        
        #F2 = tf.add(tf.matmul(F1, weights['f2']), biases['bf2'])
        #F2 = tf.nn.relu(F2)
        
        #F3 = tf.add(tf.matmul(F1, weights['f3']), biases['bf3'])
        #out_layer = tf.reshape(F3, [272, 1])

        # Output layer with linear activation
        Z4 = layer_1#tf.matmul(Atem1, layer_1)#tf.matmul(Atem, layer_3)
        #out_layer = tf.add(tf.matmul(Z4, weights['out']), biases['bout'])
        #out_layer = tf.nn.relu(out_layer)
        # weather layer 1
        #x_wea_tem = tf.reshape(x_wea[i,:], [1, 9*frequency2]) # 1 by 126
        #layer_1_wea = tf.add(tf.matmul(x_wea_tem, weights['h1_wea']), biases['b1_wea'])
        #layer_1_wea = tf.nn.relu(layer_1_wea)
        
        #out_layer_wea = tf.add(tf.matmul(layer_1_wea, weights['out_wea']), biases['bout_wea'])
        #out_layer = tf.add(out_layer, tf.reshape(out_layer_wea, [272, 1]))
        
        #print (out_layer.get_shape())
        if i ==0:
            #print (out_layer.shape)
            #tem = tf.reshape(Z4, [1, -1])
            output_list = Z4
        else:
            #tem = tf.reshape(Z4, [1, -1])
            output_list = tf.concat([output_list, Z4], 0)
        
        #print (tf.reduce_mean(tf.pow(output_list-out_layer, 2)))
    
    #print (output_list.get_shape())
    #print ('here!!!!!!!!!!!!!!!!')
    #output_list = tf.transpose(output_list)
    
    #print (output_list.shape)
    
    #print (output_list.get_shape())
    
    return output_list
'''

IndentationError: unexpected indent (<ipython-input-160-cf2d07f7cf52>, line 32)

In [9]:
# Create model
def gcn(x, weights, biases, batch_size, n_input, frequency,flag, n_output_vec):
    # Hidden layer with RELU activation
    
   # output_list = tf.Variable(tf.zeros([n_output_vec,1]),dtype=tf.float32) #'Tensor' object does not support item assignment, cant build Ypre

    #Xtem = tf.reshape(x[i,:], [n_input, frequency])
    #Xtem = tf.transpose(Xtem)
    #Atem = tf.convert_to_tensor(A_whole_final, dtype=np.float32)
    #Atem1 = tf.diag(tf.ones([n_input])) # Atem not palying any roles
    #Ytem = tf.reshape(Y[i,:], [n_input, 1])
    #Atem = tf.diag(tf.ones([n_input]))
    #x = tf.reshape(x, [-1, sn, 1]) # 100, 207, 1
    
    # x (?, 207, 12)
    x = tf.transpose(x, [1, 0, 2]) # 207, ?, 12
    x = tf.reshape(x, [sn, -1]) # 207, batch*feature_num
    Atem1 = 0.5*(weights['A1'] + tf.transpose(weights['A1']))#+ Atem 
    Atem1 = normalize_adj(Atem1)
    #th = tf.constant(0.01, dtype=tf.float32)
    #where = tf.subtract(Atem1, th)
    #Atem1 = tf.nn.relu(where)

    Z1 = tf.matmul(Atem1, x) # 207, batch*feature_num  #+ tf.matmul( tf.matmul(weights['A1'], weights['A1']), Xtem)
    Z1 = tf.reshape(Z1, [-1, frequency]) # 207* 100, frequency
    #layer_1 = tf.matmul(Z1, weights['h1']) 
    layer_1 = tf.add(tf.matmul(Z1, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1) # 207*100, hidden1

    
    #Atem2 = 0.5*(weights['A2'] + tf.transpose(weights['A2']))#+ Atem 
    #Atem2 = normalize_adj(Atem2)
    
    #layer_1 = tf.reshape(layer_1, [sn, -1])  # 207, batchsize*hidden1
    #Z2 = tf.matmul(Atem2, layer_1)
    #Z2 = tf.reshape(Z2, [-1, n_hidden_vec1]) # 207*batchsize, n_hidden_vec1
    #layer_2 = tf.add(tf.matmul(Z2, weights['h2']), biases['b2'])
    #layer_2 = tf.nn.relu(layer_2) # 207*batchsize, hidden2

    #Atem3 = 0.5*(weights['A3'] + tf.transpose(weights['A3']))#+ Atem 
    #Atem3 = normalize_adj(Atem3) 
    
    #layer_2 = tf.reshape(layer_2, [sn, -1])  # 207, batchsize*hidden2
    #Z3 = tf.matmul(Atem3, layer_2)
    #Z3 = tf.reshape(Z3, [-1, n_hidden_vec2]) # 207*batchsize, hidden2
    #layer_3 = tf.add(tf.matmul(Z3, weights['h3']), biases['b3'])
    #layer_3 = tf.nn.relu(layer_3) # 207*batchsize, hidden3
    
    #Atem4 = 0.5*(weights['A4'] + tf.transpose(weights['A4']))#+ Atem 
    #Atem4 = normalize_adj(Atem4)
    #Z4 = tf.matmul(Atem4, layer_3)
    #layer_4 = tf.add(tf.matmul(Z4, weights['h4']), biases['b4'])
    #layer_4 = tf.nn.relu(layer_4)

    # flattern
    #layer_3 = tf.reshape(layer_3, [1, 272*n_hidden_vec3])

    #F1 = tf.add(tf.matmul(layer_3, weights['f1']), biases['bf1'])
    #F1 = tf.nn.relu(F1)

    #F2 = tf.add(tf.matmul(F1, weights['f2']), biases['bf2'])
    #F2 = tf.nn.relu(F2)

    #F3 = tf.add(tf.matmul(F1, weights['f3']), biases['bf3'])
    #out_layer = tf.reshape(F3, [272, 1])

    # Output layer with linear activation
    layer_1 = tf.reshape(layer_1, [sn, -1, n_hidden_vec1])
    layer_1 = tf.transpose(layer_1, [1, 0, 2]) # batchsize, sn, hidden3
    layer_1 = tf.reshape(layer_1, [-1, sn*n_hidden_vec1]) # batchsize, sn*hidden3
    Z4 = layer_1
    #out_layer = tf.add(tf.matmul(Z4, weights['outg']), biases['boutg'])
    #tf.matmul(Atem1, layer_1)#tf.matmul(Atem, layer_3)
    #out_layer = tf.add(tf.matmul(Z4, weights['out']), biases['bout'])
    #out_layer = tf.nn.relu(out_layer)
    # weather layer 1
    #x_wea_tem = tf.reshape(x_wea[i,:], [1, 9*frequency2]) # 1 by 126
    #layer_1_wea = tf.add(tf.matmul(x_wea_tem, weights['h1_wea']), biases['b1_wea'])
    #layer_1_wea = tf.nn.relu(layer_1_wea)

    #out_layer_wea = tf.add(tf.matmul(layer_1_wea, weights['out_wea']), biases['bout_wea'])
    #out_layer = tf.add(out_layer, tf.reshape(out_layer_wea, [272, 1]))


    
    return Z4

In [13]:
def gcn_corr_final(frequency, horizon, learning_rate, decay,batch_size, n_hidden_vec1,n_hidden_vec2,n_hidden_vec3,keep, early_stop_th,training_epochs, reg1, reg2):
    # set size
    #sn = 3 # station number
    X_whole = []
    Y_whole = []

    x_offsets = np.sort(
        # np.concatenate(([-week_size + 1, -day_size + 1], np.arange(-11, 1, 1)))
        np.concatenate((np.arange(-frequency+1, 1, 1),))
    )
    # Predict the next one hour
    y_offsets = np.sort(np.arange(1, 1+ horizon, 1))

    min_t = abs(min(x_offsets))
    max_t = abs(raw_data.shape[0] - abs(max(y_offsets)))  # Exclusive
    for t in range(min_t, max_t):
        x_t = raw_data.iloc[t + x_offsets, 0:sn].values.flatten('F')
        y_t = raw_data.iloc[t + y_offsets, 0:sn].values.flatten('F')
        X_whole.append(x_t)
        Y_whole.append(y_t)

    X_whole = np.stack(X_whole, axis=0)
    time_step = int(X_whole.shape[1] / sn)
    X_whole = np.reshape(X_whole, [X_whole.shape[0], sn, time_step])
    Y_whole = np.stack(Y_whole, axis=0)
    
    i = lstm_steps
    X_whole_lstm = []
    Y_whole_lstm = []
    
    while i <= X_whole.shape[0]:
        X_whole_lstm.append(X_whole[i-lstm_steps:i,:])
        Y_whole_lstm.append(Y_whole[i-1])
        i = i + 1
    
    X_whole_lstm = np.stack(X_whole_lstm, axis = 0) # (34239, 10, 207, 12)
    Y_whole_lstm = np.stack(Y_whole_lstm, axis = 0) # (34239, 2484)
    #print (Y_whole_lstm.shape)
    '''
    time_step = int(time_step) #

    #i = time_step
    #X_whole = np.zeros(shape = (raw_data.shape[0] - time_step, sn*time_step), dtype = np.float)
    #Y_whole = np.zeros(shape = (raw_data.shape[0] - time_step, sn), dtype = np.float)

    while i < raw_data.shape[0]:
        X_whole[i - time_step, ] = raw_data.iloc[(i - time_step):i, 0:sn].values.flatten('F') # 'F' flatten by column, default:flatten by row 0, 1, 2...7
        Y_whole[i - time_step, ] = raw_data.iloc[i, 0:sn]
        i = i + 1
    '''


    n_input = sn # station number
    n_input_vec = n_input * frequency # 207 * frequency
    n_A_vec = n_input * n_input
    n_output_vec = Y_whole_lstm.shape[1] # each row represent a result
    #print (n_output_vec)

    
    num_samples = X_whole_lstm.shape[0]
    num_test = round(num_samples * 0.2)
    num_train = round(num_samples * 0.7)
    num_val = num_samples - num_test - num_train
    #skip = skip1 + freq_max - time_step#time_step_max - time_step # to make sure the testing datasets are the same although the frequency could be different

    X_training = X_whole_lstm[:num_train, :]
    Y_training = Y_whole_lstm[:num_train, :]
    
    # shuffle
    perm = np.arange(X_training.shape[0])
    np.random.shuffle(perm)
    X_training = X_training[perm]
    Y_training = Y_training[perm]
    
    #print (type(X_training))
    #X_training = random.Random(6).shuffle(X_training)
    #Y_training = random.Random(6).shuffle(Y_training)

    X_val = X_whole_lstm[num_train:num_train+num_val, :]
    Y_val = Y_whole_lstm[num_train:num_train+num_val, :]
    #A_val = A_whole[0+training:0+training+validation, :]

    X_test = X_whole_lstm[-num_test:, :]
    Y_test = Y_whole_lstm[-num_test:, :]

    scaler = StandardScaler(mean=X_training.mean(), std=X_training.std())

    X_training = scaler.transform(X_training)
    Y_training = scaler.transform(Y_training)

    X_val = scaler.transform(X_val)
    Y_val = scaler.transform(Y_val)

    X_test = scaler.transform(X_test)
    Y_test = scaler.transform(Y_test)
    
    early_stop_th = int(early_stop_th)
    training_epochs = int(training_epochs)
    
    early_stop_k=0
    display_step = 1
    best_val = 10000
    traing_error = 0
    test_error = 0
    test_error_by_h = 0 # test error by horizon
    predic_res = []
    Y_true = []

    tf.reset_default_graph()

    keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
    
    
    batch_size = int(batch_size)
    n_hidden_vec1 = int(n_hidden_vec1)
    n_hidden_vec2 = int(n_hidden_vec2)
    n_hidden_vec3 = int(n_hidden_vec3)
    early_stop_th = int(early_stop_th)
    training_epochs = int(training_epochs)
    
    early_stop_k=0
    display_step = 1
    best_val = 10000
    traing_error = 0
    test_error = 0
    # Network Parameters

    #n_classes = 2 # MNIST total classes (0-9 digits) # n_classes is for classification only

    tf.reset_default_graph()

    # tf Graph input
    X = tf.placeholder(tf.float32, [None, lstm_steps, sn, time_step]) # X is the input signal
    #X_weather = tf.placeholder(tf.float32, [None, 9 * frequency2]) # X_weather weather and holiday information (9 is the feature number)
    A = tf.placeholder(tf.float32, [None, n_A_vec]) # A is the normalized adj matrix
    oldA = tf.placeholder(tf.float32, [n_input, n_input])
    Y = tf.placeholder(tf.float32, [None, n_output_vec]) # y is the regression output
    #num = tf.placeholder(tf.int32,[1, 1] )

    #Xtem = tf.placeholder(tf.float32, [n_input, frequency]) # for each row of X, A, Y, it can be reshaped to Xtem, Atem, Ytem
    #Atem = tf.placeholder(tf.float32, [n_input, n_input]) # 
    #Ytem = tf.placeholder(tf.float32, [n_input, 1]) #

    #Ypre = tf.placeholder(tf.float32, [None, n_output_vec])

    keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

    # Store layers weight & bias
    weights = {
        'h1': tf.Variable(tf.random_normal([frequency, n_hidden_vec1]), dtype=np.float32),
        'h2': tf.Variable(tf.random_normal([n_hidden_vec1, n_hidden_vec2]), dtype=np.float32),
        'h3': tf.Variable(tf.random_normal([n_hidden_vec2, n_hidden_vec3]), dtype=np.float32),
        #'h4': tf.Variable(tf.random_normal([n_hidden_vec3, n_hidden_vec4])),
        'outg': tf.Variable(tf.random_normal([sn*n_hidden_vec1, n_hidden_vec4]), dtype=np.float32), 
        'out': tf.Variable(tf.random_normal([num_hidden, Y_whole.shape[1]]), dtype=np.float32), # dont forget to change n_hidden_vec1 when add/delete layers
        #'f1': tf.Variable(tf.random_normal([272*n_hidden_vec3, 100])),
        #'f2': tf.Variable(tf.random_normal([50, 10])),
        #'f3': tf.Variable(tf.random_normal([100, 272])),
        'A1': tf.Variable(tf.random_normal([n_input,n_input]), dtype=np.float32),
        'A2': tf.Variable(tf.random_normal([n_input,n_input]), dtype=np.float32),
        'A3': tf.Variable(tf.random_normal([n_input,n_input]), dtype=np.float32),
        #'A4': tf.Variable(tf.random_normal([n_input,n_input])),
        #'h1_wea': tf.Variable(tf.random_normal([9*frequency2, n_hidden_weather1])),
        #'out_wea': tf.Variable(tf.random_normal([n_hidden_weather1, n_input]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([1, n_hidden_vec1]), dtype=np.float32),# n_hidden_vec1])),# bias all the same??? dont forget to test
        'b2': tf.Variable(tf.random_normal([1, n_hidden_vec2]), dtype=np.float32), #n_hidden_vec2])),
        'b3': tf.Variable(tf.random_normal([1, n_hidden_vec3]), dtype=np.float32),#n_hidden_vec3])),
        #'b4': tf.Variable(tf.random_normal([n_input, n_hidden_vec4])),
        #'b1': tf.Variable(tf.random_normal([n_input,n_hidden_vec1])),# bias all the same??? dont forget to test
        #'b2': tf.Variable(tf.random_normal([n_input,n_hidden_vec2])),
        #'b3': tf.Variable(tf.random_normal([n_input,n_hidden_vec3])),
        #'bf1': tf.Variable(tf.random_normal([1, 100])), 
        #'bf2': tf.Variable(tf.random_normal([1, 10])), 
        #'bf3': tf.Variable(tf.random_normal([1, 272])), 
        'boutg': tf.Variable(tf.random_normal([1, n_hidden_vec4]), dtype=np.float32), 
        'bout': tf.Variable(tf.random_normal([Y_whole.shape[1]]), dtype=np.float32), 
        #'b1_wea': tf.Variable(tf.random_normal([1, n_hidden_weather1])), 
        #'bout_wea': tf.Variable(tf.random_normal([1, n_input])), 
    }
    
    with tf.variable_scope('lstm'):
        lstm = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True)
        rnn_input_seq = tf.unstack(X, lstm_steps, 1) # lstm_steps is the 2nd variable

        for i in range(lstm_steps):
            rnn_input_seq[i] = gcn(rnn_input_seq[i], weights, biases, batch_size,n_input, frequency, 1, n_output_vec)
            #print (rnn_input_seq[i].shape)
        outputs, states = tf.contrib.rnn.static_rnn(lstm, rnn_input_seq, dtype=tf.float32)
        output_reshape = tf.reshape(outputs[-1], [-1, num_hidden])
        #print ('123here!!!!!!!!!!!')
        pred = tf.matmul(output_reshape, weights['out']) + biases['bout']
        #print (pred)
        #pred = tf.reshape(pred, [-1, Y_whole.shape[1]])
        #print ('here!!!!!!!!!!!')
        pred = scaler.inverse_transform(pred)
        Y_true_tr = scaler.inverse_transform(Y)

        cost = masked_mae_tf(pred, Y_true_tr, 0)
        #print (cost)
        
    with tf.variable_scope('lstm', reuse=True):
        rnn_input_seq_val = tf.unstack(X, lstm_steps, 1)

        for i in range(lstm_steps):
            rnn_input_seq_val[i] = gcn(rnn_input_seq_val[i], weights, biases, batch_size,n_input,frequency, 2, n_output_vec)
        outputs_val, states = tf.contrib.rnn.static_rnn(lstm, rnn_input_seq_val, dtype=tf.float32)
        output_reshape = tf.reshape(outputs_val[-1], [-1, num_hidden])

        pred_val = tf.matmul(output_reshape, weights['out']) + biases['bout']
        pred_val = scaler.inverse_transform(pred_val)
        Y_true_val = scaler.inverse_transform(Y)
        cost_val = masked_mae_tf(pred_val, Y_true_val, 0)
        #print ('234here!!!!!!!!!!!')
    with tf.variable_scope('lstm', reuse=True):
        rnn_input_seq_test = tf.unstack(X, lstm_steps, 1)
        
        for i in range(lstm_steps):
            rnn_input_seq_test[i] = gcn(rnn_input_seq_test[i], weights, biases, batch_size,n_input,frequency, 3, n_output_vec)
        outputs_test, states = tf.contrib.rnn.static_rnn(lstm, rnn_input_seq_test, dtype=tf.float32)
        output_reshape = tf.reshape(outputs_test[-1], [-1, num_hidden])

        pred_tes = tf.matmul(output_reshape, weights['out']) + biases['bout']
        pred_tes = scaler.inverse_transform(pred_tes)
        Y_true_tes = scaler.inverse_transform(Y)
        cost_tes = masked_mae_tf(pred_tes, Y_true_tes, 0)
        cost_tes_by_horizon = masked_mae_tf_by_horizon(pred_tes, Y_true_tes, 0)
        #print ('345here!!!!!!!!!!!')
    #rmse
    #cost_tes = tf.reduce_mean(tf.pow(pred_tes-Y, 2))
    # cross-entropy for classification
    # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y_train))
    # ratio = tf.abs(tf.reduce_sum(pred)-tf.reduce_sum(Y))/tf.reduce_sum(Y)
    #zero = 0
    #ratio = tf.reduce_mean(tf.divide(tf.where(tf.not_equal(Y, zero), np.abs(pred-Y), tf.zeros(Y.get_shape(), tf.float32)), tf.where(tf.not_equal(Y, zero), Y, tf.ones(Y.get_shape(), tf.float32))))
    #optimizer = tf.train.RMSPropOptimizer(learning_rate, decay).minimize(cost)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    #total_val_cost = []
    #total_val_ratio = []

    # learning start from 

    #index = daily_bike[(daily_bike['year'] == 2016) & (daily_bike['monthofyear'] == 1) & (daily_bike['dayofmonth'] == 1)].index.tolist()[0]
    #A_hat = normalize_adj(corr_matrix_trips)
    #print(A_hat)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init)

        for epoch in range(training_epochs):

            avg_cost = 0.
            total_batch = int(num_train/batch_size) #int(num_train/batch_size)

            for i in range(total_batch):
                #print (Y_training[i*batch_size:(i+1)*batch_size,].size())
                #num = batch_size
                _, c, preds, trueval = sess.run([optimizer, cost, pred, Y_true_tr], feed_dict={X: X_training[i*batch_size:(i+1)*batch_size,], 
                                                      Y: Y_training[i*batch_size:(i+1)*batch_size,],  
                                                              keep_prob: keep})
                #print (preds)
                #print (trueval)
                #print ("Epoch:", '%04d' % (epoch+1), "batch: ", i, "batch cost=", \
                #    "{:.9f}".format(c))
                #print ('here!!!!!!!!!!!!!!!!')
                avg_cost += c * batch_size #/ total_batch 
                #Display logs per epoch step
                
            # rest part of training dataset
            #num = num_train - total_batch*batch_size 
            if total_batch * batch_size != num_train:
                _, c, preds, trueval = sess.run([optimizer, cost, pred, Y_true_tr], feed_dict={X: X_training[total_batch*batch_size:num_train,], 
                                          Y: Y_training[total_batch*batch_size:num_train,],
                                                  keep_prob: keep})
                avg_cost += c * (num_train - total_batch*batch_size)
            
            avg_cost = avg_cost / num_train
            
            if epoch % display_step == 0:
                print ("Epoch:", '%04d' % (epoch+1), "Training MAE=", \
                    "{:.9f}".format(avg_cost)) #np.sqrt(avg_cost)
                
            # also use batch to save memory
            # validation
            c_val = 0.
            total_bat_val = int(num_val/batch_size)
            for i in range(total_bat_val):
                #num = batch_size
                c_val_b = sess.run([cost_val], feed_dict={X: X_val[i*batch_size:(i+1)*batch_size,], 
                                                          Y: Y_val[i*batch_size:(i+1)*batch_size,],   keep_prob:1})
                c_val += c_val_b[0]*batch_size
            
            if total_bat_val * batch_size != num_val:
                #num = num_val - total_bat_val*batch_size
                c_val_b = sess.run([cost_val], feed_dict={X: X_val[total_bat_val*batch_size:num_val,], 
                                                          Y: Y_val[total_bat_val*batch_size:num_val,],  keep_prob:1})
                c_val += c_val_b[0] * (num_val - total_bat_val*batch_size)
                
            c_val = c_val / num_val
            
            print("Validation MAE: ", c_val)
            
            # test
            c_tes = 0.
            c_tes_h = []
            total_bat_test = int(num_test/batch_size)
            
            pre_test_tem = [] # save the prediction results
            Y_tes_true = []
            
            for i in range(total_bat_test):
                #num = batch_size
                c_tes_b, pred_tes1, Y_tes_batch, cost_h = sess.run([cost_tes, pred_tes, Y_true_tes, cost_tes_by_horizon], feed_dict={X: X_test[i*batch_size:(i+1)*batch_size,],
                                                                               Y: Y_test[i*batch_size:(i+1)*batch_size,],  keep_prob: 1})
                c_tes += c_tes_b*batch_size
                c_tes_h.append([i*batch_size for i in cost_h])
                #print (cost_h)
                #print (pred_tes1.shape)
                pre_test_tem.append(pred_tes1)
                Y_tes_true.append(Y_tes_batch)
                
            if total_bat_test * batch_size != num_test:
                #num = num_test - total_bat_test*batch_size
                c_tes_b, pred_tes1, Y_tes_batch, cost_h= sess.run([cost_tes, pred_tes, Y_true_tes, cost_tes_by_horizon], feed_dict={X: X_test[total_bat_test*batch_size:num_test,],
                                                                               Y: Y_test[total_bat_test*batch_size:num_test,],  keep_prob: 1})
                c_tes += c_tes_b * (num_test - total_bat_test*batch_size) 
                c_tes_h.append([i * (num_test - total_bat_test*batch_size) for i in cost_h]) 
                
                #print (pred_tes1.shape)
                pre_test_tem.append(pred_tes1)
                Y_tes_true.append(Y_tes_batch)
                
            #print (c_tes_h.shape)   
            c_tes_h = np.array(c_tes_h)
            
            #print (c_tes_h.shape)
            pre_test_tem = np.concatenate(pre_test_tem, axis = 0)
            #print (pre_test_tem.shape)
            Y_tes_true = np.concatenate(Y_tes_true, axis = 0)
            
            c_tes = c_tes / num_test
            c_tes_h = np.sum(c_tes_h, axis = 0)
            c_tes_h = c_tes_h / num_test
            #c_tes_h = c_tes_h / num_test
            
            print("Test MAE: ", c_tes)
            #print("predic step: ", cost_by_hor)

            if c_val < best_val:
                best_val = c_val
                #saver.save(sess, './bikesharing_graph_2_th_point1')
                test_error = c_tes
                test_error_by_h = c_tes_h
                traing_error = avg_cost#np.sqrt(avg_cost)
                early_stop_k = 0 # reset to 0
                #print (pred_tes1)
                predic_res = pre_test_tem
                Y_true = Y_tes_true
                #predic_step = cost_by_hor

            # early stopping
            if c_val >= best_val:
                early_stop_k += 1

            # threshold
            if early_stop_k == early_stop_th:
              #  print ("early stopping...")
                break
            

        print("epoch is ", epoch)
        print("training error is ", traing_error)
        print("Optimization Finished! the lowest validation MAE is ", best_val)#(np.sqrt(best_val)))
        print("The test MAE is ", test_error)#(np.sqrt(test_error)))
    
    #test_Y = Y_test
    #test_error = np.sqrt(test_error)
    return test_error, test_error_by_h, predic_res, Y_true#, A1#, predic_step#, A2

In [14]:
# increas batch size from 100 to 1000: validation error 39.XX at about 100 epochs
# batch size to 50: validation error 6.XX at about 100 epochs
# add one more gcn layer: 8.XX
# increase learning rate to 0.01, batch size is 100: 10.XX
# decrease learning rate to 0.005, batch size is 100: 10.XX
# decrease learning rate to 0.005, batch size is 50, two hidden levels (same matrix):
    # 6.XX at 100th epoch
# decrease learning rate to 0.005, batch size is 50, one hidden level, hidden 40:
    # 6.XX at 100th epoch
# decrease learning rate to 0.005, batch size is 50, one hidden level, hidden 20:
    # drop much faster
    # 6.XX can be get, but kind of overfitting at 100th epoch?
# decrease learning rate to 0.005, batch size is 50, one hidden level, hidden 10:
    # drop much faster
    #  at 100th epoch
# learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #reduce prediction horizon from 12 to 1, normalize the data:
    # val: 3.16

# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #reduce prediction horizon from 12 to 1, normalize the data, normalize the symmetric adjacency matrix:
    # train: 3.24, val: 3.42
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #reduce prediction horizon from 12 to 1, normalize the data, normalize the symmetric adjacency matrix, 
    # train: 3.08, val: 3.26
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #reduce prediction horizon from 12 to 1, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper
    # train: 2.82, val: 2.75
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, one layer
    # train: 4.08, val: 3.99, test: 4.29 
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, two layers (same adjmatrix)
    # train: 4.10, val: 4.19, test: 4.54 
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, two layers (different adjmatrix)
    # train: 3.52, val: 3.62, test: 4.03
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, two layers (different adjmatrix), bias set as a vector of same length
    # train: 3.53, val: 3.58, test: 3.91 
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # train: 3.36, val: 3.42, test: 3.82
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # last hidden size 10 to 20
    # train: 3.33, val: 3.38, test: 3.71
    # train: 3.24, val: 3.31, test: 3.66
    # train: 3.22, val: 3.29, test: 3.66
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 5, 5, 10
    # train: 3.5, val: 3.48, test: 3.82 
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 10, 10, 20
    # learning rate: 0.005
    # train: 3.23, val: 3.34, test: 3.74
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 200
    # three hidden size 10, 10, 20
    # learning rate: 0.01
    # shuffle
    # train: 3.19, val: 3.30, test: 3.67
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 10, 10, 20
    # learning rate: 0.01
    # shuffle
    # train: 3.15, val: 3.26, test: 3.61
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 10, 10, 20
    # learning rate: 0.01
    # shuffle
    # fully freedom adjacency matrix (not symmetric )
    # train: , val: , test: 
    # training cost around 6.15, not decreasing anymore
    
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 10, 10, 20
    # learning rate: 0.005
    # change gradient decent algorithm
    # train: 3.19, val: 3.28, test: 3.67

# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 100
    # three hidden size 10, 10, 20
    # learning rate: 0.005
    # change gradient decent algorithm
    # keep: 0.8
    # train: 3.30, val: 3.36, test: 3.78
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 200
    # three hidden size 10, 10, 20
    # learning rate: 0.01
    # change gradient decent algorithm
    # keep: 1
    # train: 3.15, val: 3.27, test: 3.63
    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch from 50 to 200
    # three hidden size 10, 10, 20
    # learning rate: 0.01
    # change gradient decent algorithm
    # keep: 1
    # bias not the same size
    # decreasing slow

    
# increase sample, learning rate to 0.01, batch size is 50, one hidden level, hidden 10, 
    #prediction horizon 12, normalize the data, normalize the symmetric adjacency matrix, 
    # remove "0" ground truths as the paper, three layers (three adjmatrix), bias set as a vector of same length
    # batch size 200
    # second size 10 to 10
    # last hidden size 10 to 30
    # set the same size may lead to indecreasing cost (need to increase batch size)
    # train: 3.05, val: 3.22, test: 3.73

# batch size 150 really slow


In [15]:
# change hidden number
# change batch_size
# set size
#from bayes_opt import BayesianOptimization
import datetime
#freq_max = 12
#time_step = 12
learning_rate = 0.002
decay = 0.9 
batch_size = 500
num_hidden = 100 # number of hiddent units in LSTM Cell
early_stop_th = 150
training_epochs = 500
keep = 1#0.2
#time_step_max = 10

sn = 207 # station num
#test = 2000 
reg1 = 0.05#0.05
reg2 = 0.05#0.1
reg3 = 0.05
frequency = 12
horizon = 12
lstm_steps = 3  # number of lstm cells
n_hidden_vec1 = 5
n_hidden_vec2 = 5
n_hidden_vec3 = 10
n_hidden_vec4 = 10
#num = 0
#All_pred = np.empty([2000, 207])
#All_Y = np.empty([2000, 207])

#24*90
#step = 0
#gap = 100
#training = 0.7
#validation = 0.1
#test = 0.2

#gcn_corr_eval(7, 0.01, 0.5, 100, 0.4, 10, 5, 5, 0.2, 50, 500)



rep = 1 # repeating times

#total_sn = 0
#num_iter = 50
#init_points = 200


# stdbscan
#spatial_threshold = 300
#temporal_threshold = 300
#min_neighbors = 1 # number of neighbor

#frequency2 = skip1 + freq_max + training

#while step < 2000:

#hourly_bike_cluster = hourly_bike
best = -10000
pre_best = []
test_Y_best = []
test_error_best = 1000
A1_best = []
# A2_best = []
for i in range(rep):
    a = datetime.datetime.now()
    test_error, test_error_by_h, predic_res, Y_true = gcn_corr_final(frequency, horizon, learning_rate, decay, batch_size, n_hidden_vec1,
                                                                n_hidden_vec2, n_hidden_vec3, keep, early_stop_th, training_epochs, reg1, reg2)
    #val_error, predic_res, test_Y,test_error=gcn_corr_final(a['frequency'], a['learning_rate'], a['decay'], a['batch_size'], a['n_hidden_vec1'], a['n_hidden_vec2'], a['n_hidden_vec3'], a['keep'], a['early_stop_th'], a['training_epochs'], a['reg'])
    #print ("finished A running: ", i)
    b = datetime.datetime.now()
    print(b-a)
    

    #total_sn = total_sn + sn

    #total_error = np.sqrt(np.mean((All_pred[0:(step+gap),0:total_sn] - All_Y[0:(step+gap),0:total_sn])**2))

    #print ("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!the cluster now is: ", c)
    #print ("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!the val error of this cluster now is: ", best)
    #print ("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!the test error by this cluster now is: ", total_error)
    #step = step + gap
    #skip1 = skip1 + gap
    
    #np.savetxt("prediction_2.csv", All_pred, delimiter = ',')
    #np.savetxt("prediction_Y_2.csv", All_Y, delimiter = ',')
    

Epoch: 0001 Training MAE= 18.955187484
Validation MAE:  17.544981991287564
Test MAE:  17.634381224489818
Epoch: 0002 Training MAE= 16.356737253
Validation MAE:  15.50623103649947
Test MAE:  15.634097727707205
Epoch: 0003 Training MAE= 14.209621498
Validation MAE:  13.219823266467909
Test MAE:  13.360854074509062
Epoch: 0004 Training MAE= 11.915199907
Validation MAE:  10.835028181980997
Test MAE:  11.021952507808829
Epoch: 0005 Training MAE= 9.680294984
Validation MAE:  8.764116903291132
Test MAE:  9.046445886798802
Epoch: 0006 Training MAE= 7.949668890
Validation MAE:  7.35534468880535
Test MAE:  7.72331289640046
Epoch: 0007 Training MAE= 6.749745949
Validation MAE:  6.373406065641529
Test MAE:  6.851251471214946
Epoch: 0008 Training MAE= 6.123223132
Validation MAE:  5.836713625566802
Test MAE:  6.365676527459145
Epoch: 0009 Training MAE= 5.738890978
Validation MAE:  5.454891248341024
Test MAE:  6.022880807481973
Epoch: 0010 Training MAE= 5.519672534
Validation MAE:  5.288546871964949


Test MAE:  4.169394360543829
Epoch: 0082 Training MAE= 3.613466983
Validation MAE:  3.6988931241696767
Test MAE:  4.162210484458889
Epoch: 0083 Training MAE= 3.611257438
Validation MAE:  3.7013837038165462
Test MAE:  4.1569932807290755
Epoch: 0084 Training MAE= 3.605199549
Validation MAE:  3.6964908551125633
Test MAE:  4.164220635952575
Epoch: 0085 Training MAE= 3.596570208
Validation MAE:  3.6893265699818185
Test MAE:  4.158429555465469
Epoch: 0086 Training MAE= 3.585662451
Validation MAE:  3.683890911784485
Test MAE:  4.157954182585133
Epoch: 0087 Training MAE= 3.583276554
Validation MAE:  3.6893146542737085
Test MAE:  4.157206506446463
Epoch: 0088 Training MAE= 3.573200410
Validation MAE:  3.68391868667881
Test MAE:  4.159530275737235
Epoch: 0089 Training MAE= 3.566714978
Validation MAE:  3.673955651095314
Test MAE:  4.147176089922448
Epoch: 0090 Training MAE= 3.556245725
Validation MAE:  3.6605258544866186
Test MAE:  4.139210642437149
Epoch: 0091 Training MAE= 3.550389768
Validatio

Test MAE:  4.075378430082878
Epoch: 0162 Training MAE= 3.196614629
Validation MAE:  3.629597508994332
Test MAE:  4.082352320074705
Epoch: 0163 Training MAE= 3.193370301
Validation MAE:  3.6297543814582545
Test MAE:  4.076850642982513
Epoch: 0164 Training MAE= 3.187407845
Validation MAE:  3.629372173852294
Test MAE:  4.077990787362405
Epoch: 0165 Training MAE= 3.184778433
Validation MAE:  3.632646974855966
Test MAE:  4.086093522663759
Epoch: 0166 Training MAE= 3.182060739
Validation MAE:  3.628629352054457
Test MAE:  4.080729696276867
Epoch: 0167 Training MAE= 3.175691152
Validation MAE:  3.62979444274067
Test MAE:  4.080629266392734
Epoch: 0168 Training MAE= 3.170243434
Validation MAE:  3.6207374934732477
Test MAE:  4.074622135298812
Epoch: 0169 Training MAE= 3.164154235
Validation MAE:  3.6247631998827856
Test MAE:  4.0754569826169265
Epoch: 0170 Training MAE= 3.160014201
Validation MAE:  3.612976735525758
Test MAE:  4.068857653469287
Epoch: 0171 Training MAE= 3.155515054
Validation M

Test MAE:  4.142405908879511
Epoch: 0242 Training MAE= 2.986728167
Validation MAE:  3.6623953171890147
Test MAE:  4.137704724760051
Epoch: 0243 Training MAE= 2.989374341
Validation MAE:  3.662203204022707
Test MAE:  4.1336344770313715
Epoch: 0244 Training MAE= 2.993136185
Validation MAE:  3.6595270825128488
Test MAE:  4.140192471450046
Epoch: 0245 Training MAE= 2.993177050
Validation MAE:  3.6533530733011066
Test MAE:  4.121679046342348
Epoch: 0246 Training MAE= 2.982102280
Validation MAE:  3.645264839603953
Test MAE:  4.114524863169722
Epoch: 0247 Training MAE= 2.974687314
Validation MAE:  3.644863633343773
Test MAE:  4.110593149619897
Epoch: 0248 Training MAE= 2.969446421
Validation MAE:  3.648818611228553
Test MAE:  4.115874871944473
Epoch: 0249 Training MAE= 2.965332544
Validation MAE:  3.6501851760557966
Test MAE:  4.125029935264504
Epoch: 0250 Training MAE= 2.961781953
Validation MAE:  3.6492067444933594
Test MAE:  4.118708355441757
Epoch: 0251 Training MAE= 2.957428564
Validatio

KeyboardInterrupt: 

In [187]:
test_error_by_h

array([2.97430119, 2.94902541, 2.92731638, 2.90907956, 2.89517942,
       2.88547814, 2.8794575 , 2.8779491 , 2.88067882, 2.88815727,
       2.90016863, 2.91680584])

In [189]:
np.savetxt("../../data/trajectory/lstm_gcnn_prediction_2.9168.csv", predic_res, delimiter = ',')
np.savetxt("../../data/trajectory/lstm_gcnn_prediction_2.9168_Y.csv", Y_true, delimiter = ',')
np.savetxt("../../data/trajectory/lstm_gcnn_prediction_2.9168by_horizon.csv", test_error_by_h, delimiter = ',')

In [None]:
# increase batchsize from 500 to 1000,
    #no luck
# decrease batchsize from 500 to 100, 
    # 0/2
# batchsize 500
    # 1/2 low, 3.01, 2.87, 3.16
    # hidden state 100 1/3 3.07, 2.94, 3.21
    # reduce learning rate from 0.01 to 0.005: 3/4 3.20, 3.06, 3.34
                                                   3.20, 3.08, 3.34
                                        500 epochs 3.00, 2.88, 3.12
    # hidden 3 from 20 to 10: 4/4
    # increase lstm steps from 1 to 3: 1/1: 2.85, 2.73, 3.00
    # hidden 1 and 2 are 5, hidden 3 10: 1/1: 2.86, 2.75, 2.99
    # hidden 4 10, no output layer for gcn: 1/1: 2.74, 2.66, 2.93
    # reduce learning rate from 0.005 to 0.002: 6/6: 2.80, 2.73, 2.93
                                                     2.79, 2.71, 2.92
                                                     2.74, 2.68, 2.89
                                                     2.82, 2.74, 2.96
                                                     2.83, 2.73, 2.98
                                                     2.77, 2.69, 2.94
                                                     2.78, 2.68, 2.01