In [1]:
import math
import tensorflow as tf
import numpy as np
import matplotlib
import os
from tensorflow.contrib import rnn

tf.reset_default_graph()
tf.set_random_seed(777)

if "DISPLAY" not in os.environ:
    matplotlib.use('TkAgg')

import matplotlib.pyplot as plt

def MinMaxScaler(data):
    numerator = data - np.min(data,0)
    denominator = np.max(data,0)-np.min(data,0)
    return numerator / (denominator+1e-7)

xy = np.genfromtxt('/Users/yeseo/Desktop/taxi_data/City_Counted_TaxiMach_Link_Dataset_Full_201501 - 12.txt',delimiter = ',',dtype = None)
xy_with_noise = np.genfromtxt('/Users/yeseo/Desktop/taxi_data/2015eliminated_1.txt',delimiter = ',',dtype = None)

#data_preprocessing
xy= xy[:,:27]
a = xy[:,:2]
b = xy[:,2:]
b= MinMaxScaler(b)
xy = np.hstack((a,b))

xy_with_noise = xy_with_noise[:,:27]
a_with_noise = xy_with_noise[:,:2]
b_with_noise = xy_with_noise[:,2:]
b_with_noise = MinMaxScaler(b_with_noise)
xy_with_noise = np.hstack((a_with_noise,b_with_noise))


#parameters
seq_length =6
data_dim =27
hidden_dim = 54
output_dim = 25
learning_rate = 0.01
iterations = 4000

train_size = int(len(xy)*0.7)
validation_size = int(len(xy)*0.2)

#divide data set to train,validation and test set
train_set = xy[:train_size]
validation_set = xy[train_size:train_size+validation_size]
test_set = xy[train_size+validation_size:]

train_set_with_noise = xy_with_noise[:train_size]
validation_set_with_noise = xy_with_noise[train_size:train_size+validation_size]
test_set_with_noise = xy_with_noise[train_size+validation_size:]

# build data set for rnn
def build_dataset(time_series, seq_length):
    dataX = []
    dataY = []
    for i in range (0,len(time_series)-seq_length):
        _x = time_series[i:i + seq_length, :]
        _y = time_series[i+seq_length,2:]
     
        dataX.append(_x)
        dataY.append(_y)
    return np.array(dataX), np.array(dataY)

#train_set, test_set 만들기
trainX, trainY = build_dataset(train_set,seq_length)
validationX, validationY = build_dataset(validation_set,seq_length)
testX,testY = build_dataset(test_set, seq_length)

trainX_with_noise, trainY_with_noise = build_dataset(train_set_with_noise,seq_length)
validationX_with_noise, validationY_with_noise = build_dataset(validation_set_with_noise,seq_length)
testX_with_noise,testY_with_noise = build_dataset(test_set_with_noise, seq_length)


X1 = tf.placeholder(tf.float32,[None, seq_length,data_dim])
Y1 = tf.placeholder(tf.float32,[None,25])


X2 = tf.placeholder(tf.float32,[None, seq_length,data_dim])
Y2 = tf.placeholder(tf.float32,[None,25])

#LSTM CELL만들기

with tf.variable_scope("rnn1"):
    lstm_1_1 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple = True, activation=tf.tanh)
    lstm_1_2 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim*2, state_is_tuple = True, activation=tf.tanh)
    lstm_1_3 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple = True, activation=tf.tanh)
    cell1 = tf.contrib.rnn.MultiRNNCell([lstm_1_1,lstm_1_2,lstm_1_3])
    outputs1,_states1 = tf.nn.dynamic_rnn(cell1,X1,dtype = tf.float32)
    Y_pred = tf.contrib.layers.fully_connected(outputs1[:,-1], output_dim,activation_fn = None)
    loss1 =tf.reduce_sum(tf.square(Y_pred-Y1))
    train1 = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss1)

with tf.variable_scope("rnn2"):
    lstm_2_1 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple = True, activation=tf.tanh)
    lstm_2_2 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim*2, state_is_tuple = True, activation=tf.tanh)
    lstm_2_3 = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple = True, activation=tf.tanh)
    cell2 = tf.contrib.rnn.MultiRNNCell([lstm_2_1,lstm_2_2,lstm_2_3])
    outputs2,_states2 = tf.nn.dynamic_rnn(cell2, X2, dtype = tf.float32)
    Y_pred_with_noise = tf.contrib.layers.fully_connected(outputs2[:,-1], output_dim,activation_fn = None)

    loss2 =tf.reduce_sum(tf.square(Y_pred_with_noise-Y2))
    train2 = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss2)


#RMSE 측정
targets = tf.placeholder(tf.float32,[None,25])
predictions = tf.placeholder(tf.float32,[None,25])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets-predictions)))

x1 = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25])
x2 = x1+0.3
x3 = x2+0.3
loss_for_graph = np.zeros(iterations)
x4 = np.array(range(0,iterations))
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for i in range(iterations):
        _, step_loss1 = sess.run([train1,loss1],feed_dict={X1:trainX, Y1:trainY})
        if i%200 == 0:
            print("[step: {}] loss: {}".format(i,step_loss1))
        loss_for_graph[i] = step_loss1
        _, step_loss2 = sess.run([train2,loss2],feed_dict={X2:trainX_with_noise, Y2:trainY_with_noise})
        if i%200 == 0:
            print("[step: {}] loss: {}".format(i,step_loss2))
        
    validation_predict = sess.run(Y_pred, feed_dict = {X1:validationX})
    validation_predict_with_noise = sess.run(Y_pred_with_noise, feed_dict = {X2:validationX_with_noise})
    test_predict = sess.run(Y_pred, feed_dict = {X1:testX})
    
    rmse_val = sess.run(rmse, feed_dict={targets: validationY,predictions: test_predict})
    rmse_val_with_noise = sess.run(rmse, feed_dict={targets: validationY,predictions: test_predict_with_noise})
    print("RMSE: {} \nRMSE_with_noise{} ".format(rmse_val,rmse_val_with_noise))
   # print("pred: {}".format(test_predict[-1,:]))
    #print("real: {}".format(testY[-1,:]))
    #print("noise: {}".format(eliminate_noise_pred[-1,:]))
    
#    plt.bar(x1,test_predict[-1,:],label = 'predict',color ='b',width = 0.1)
  #  plt.bar(x2,testY[-1,:],label = 'real',color ='g',width = 0.1)
    #plt.bar(x3,eliminate_noise_pred[-1,:],label = 'noise',color ='g',width = 0.1)
    plt.plot(x4,loss_for_graph)
    #plt.legend()
    plt.show()




Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
[step: 0] loss: 11784.171875
[step: 0] loss: 13367.23828125
[step: 200] loss: 112.70023345947266
[step: 200] loss: 491.4951171875
[step: 400] loss: 65.13475036621094
[step: 400] loss: 454.6947021484375
[step: 600] loss: 52.768577575683594
[step: 600] loss: 457.9237365722656
[step: 800] loss: 36.422340393066406
[step: 800] loss: 430.6690979003906
[step: 1000] loss: 28.350595474243164
[step: 1000] loss: 420.5740966796875
[step: 1200] loss: 18.968524932861328
[step: 1200] loss: 400.88330078125
[step: 1400] loss: 25.64075469970703
[step: 1400] loss: 378.34747314453125
[step: 1600] loss: 15.255483627319336
[step: 1600] loss: 359.75958251953125
[step: 1800] loss: 13.065579414367676
[step: 1800] loss: 328.6620178222656


KeyboardInterrupt: 

In [2]:
np.min(loss_for_graph)

3.5981075763702393

In [3]:
testY

array([[0.57102562, 0.56416147, 0.66542543, ..., 0.55216831, 0.53369132,
        0.42090063],
       [0.56083993, 0.52221858, 0.6309812 , ..., 0.5217368 , 0.50736186,
        0.40806503],
       [0.58951657, 0.54230391, 0.69744926, ..., 0.528231  , 0.46830071,
        0.40346561],
       ...,
       [0.50991146, 0.45106662, 0.89880842, ..., 0.61324603, 0.69877014,
        0.70360466],
       [0.55128105, 0.47528717, 0.99050456, ..., 0.69251825, 0.80997748,
        0.81880415],
       [0.55323983, 0.44115523, 0.97616831, ..., 0.68076428, 0.80374156,
        0.80222484]])

In [4]:
test_predict

array([[0.58363825, 0.6017132 , 0.6577516 , ..., 0.56151175, 0.52582777,
        0.3907143 ],
       [0.5401327 , 0.54166913, 0.61143273, ..., 0.51359236, 0.5054405 ,
        0.36804444],
       [0.58308345, 0.5689557 , 0.6982064 , ..., 0.5376862 , 0.5538894 ,
        0.4306469 ],
       ...,
       [0.5702312 , 0.5599937 , 0.91931725, ..., 0.65527546, 0.7850928 ,
        0.7593383 ],
       [0.5282676 , 0.50003135, 0.86919653, ..., 0.6219486 , 0.7859508 ,
        0.7667056 ],
       [0.55512637, 0.53106725, 0.8721303 , ..., 0.64725614, 0.8036661 ,
        0.7901548 ]], dtype=float32)

In [7]:
plt.plot(x1,testY[0,:])
plt.plot(x2,test_predict[0,:])
plt.show()
plt.close()

KeyboardInterrupt: 

In [8]:
def correlation(data1,data2):
    

    data1_mean = np.mean(data1)
    data2_mean = np.mean(data2)

    X_1 = data1-data1_mean
    Y_1 = data2-data2_mean
    #분자
    a_1 = np.sum(np.multiply(X_1,Y_1))

    #분모
    b_1 = np.sqrt(np.sum(np.square(X_1)))
    c_1 = np.sqrt(np.sum(np.square(Y_1)))

    #상관계수
    r_1 = (a_1/(b_1*c_1))

    print("correlation(real&predict) :{}".format(r_1))
    
correlation(testY,test_predict)

correlation(real&predict) :0.9799699580363014
