In [12]:
import tensorflow as tf
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime  
tf.set_random_seed(77)
columns=['date','sales']

txs=pd.read_table('./lstmData/lstmPrac4.csv', sep=',',header=None,names=columns )
sales=list(txs['sales'])

def noOutlierSales(sales):
    mean=np.mean(sales)
    std=np.std(sales)
    for i in range(len(sales)):
        if (sales[i]<mean-2*std or sales[i]>mean+2*std):
             sales[i]=int(mean)
    return sales
def logSales(sales):
    for i in range(len(sales)):
        if sales[i] is 0:
            sales[i]=1
    return np.log(sales)
def sqrtSales(sales):
    return np.sqrt(sales)
def minMaxNormalizer(data):
    numerator=data-np.min(data)
    denominator=np.max(data)-np.min(data)
    return numerator/(denominator+1e-7)
def minMaxDeNormalizer(data, originalData):
    shift=np.min(originalData)
    multiplier=np.max(originalData)-np.min(originalData)
    return (data+shift)*multiplier 
def rootMeanSquaredError(a,b):
    sum=0
    for i in range(len(a)):
        sum=sum+(a[i]-b[i])**2
    return np.sqrt( sum/len(a))

In [13]:
year = lambda x: datetime.strptime(x, "%Y-%m-%d" ).year  
day_of_week = lambda x: datetime.strptime(x, "%Y-%m-%d" ).weekday()
month = lambda x: datetime.strptime(x, "%Y-%m-%d" ).month

week_number = lambda x: datetime.strptime(x, "%Y-%m-%d" ).strftime('%V')
txs['year'] = txs['date'].map(year)
txs['month']=txs['date'].map(month)
txs['week_number']=txs['date'].map(week_number)
txs['day_of_week']=txs['date'].map(day_of_week)

seasons = [0,0,1,1,1,2,2,2,3,3,3,0] #dec - feb is winter, then spring, summer, fall etc
season = lambda x: seasons[(datetime.strptime(x, "%Y-%m-%d" ).month-1)]
day_of_week01s=[0,0,0,0,0,1,1]
day_of_week01= lambda x: day_of_week01s[(datetime.strptime(x, "%Y-%m-%d" ).weekday())]
txs['season']=txs['date'].map(season)
txs['day_of_week01']=txs['date'].map(day_of_week01)

originalSales=list(txs['sales'])
sales=list(txs['sales'])

In [14]:
# sales=noOutlierSales(sales)
tempxy=[list(txs['season']),list(txs['day_of_week']),list(txs['week_number']),sales]
# tempxy=[list(txs['season']),list(txs['day_of_week01']),list(txs['week_number']),sales]
xy=np.array(tempxy).transpose().astype(np.float)
originalxy=np.array(tempxy).transpose().astype(np.float)
xy=minMaxNormalizer(xy)

#data_dim은 y값 도출을 위한 feature 가지수+1(독립변수 가지수 +1(y포함))
data_dim=4
#data_dim크기의 data 한 묶음이 seq_length만큼 input으로 들어가
seq_length=5
#output_dim(=forecastDays)만큼의 다음날 y_data를 예측
forecastDays=7
output_dim=forecastDays
#hidden_dim은 정말 임의로 설정
hidden_dim=100
#learning rate은 배우는 속도(너무 크지도, 작지도 않게 설정)
learning_rate=0.01
#iterations는 반복 횟수
iterations=1000
x=xy
y=xy[:,[-1]]

#build a series dataset(seq_length에 해당하는 전날 X와 다음 forecastDays에 해당하는 Y)
dataX=[]
dataY=[]
for i in range(0, len(y)-seq_length):
    _x=x[i:i+seq_length]
    _y=y[i+seq_length]
    #     _y=Y[i+seq_length:i+seq_length+forecastDays]
    print(_x,"->",_y)
    dataX.append(_x)
    dataY.append(_y)
    train_size = int(len(dataY) * 0.7)
    
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:])
X=tf.placeholder(tf.float32, [None, seq_length, data_dim])
Y=tf.placeholder(tf.float32, [None, forecastDays])

cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], output_dim, activation_fn= None) 
loss = tf.reduce_sum(tf.square(Y_pred - Y))  # sum of the squares
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
denormalizedTestY=originalSales[train_size+seq_length:]
# denormalizedTestY_original=sales[train_size+seq_length:]
denormalizedTestY_feed=np.array([[i] for i in denormalizedTestY])
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
    #초기화
    init = tf.global_variables_initializer()
    sess.run(init)

    # Training step
    for i in range(iterations):
        _, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY})
        print("[step: {}] loss: {}".format(i, step_loss))

    # Test step
    test_predict = minMaxDeNormalizer(sess.run(Y_pred, feed_dict={X: testX}),originalxy)
    rmse_val = sess.run(rmse, feed_dict={targets: denormalizedTestY_feed, predictions: test_predict})
    print("RMSE: {}".format(rmse_val))

    # Plot predictions
    plt.plot(denormalizedTestY_feed) #실제 sales 파란색
    plt.plot(test_predict)           #예측 sales 주황색
    plt.xlabel("Time Period")
    plt.ylabel("Stock Price")
    plt.show()

[[ 0.          0.02777778  0.36805556  0.23611111]
 [ 0.          0.03472222  0.36805556  0.28472222]
 [ 0.          0.04166667  0.36805556  0.375     ]
 [ 0.          0.          0.00694444  0.28472222]
 [ 0.          0.00694444  0.00694444  0.24305556]] -> [ 0.30555556]
[[ 0.          0.03472222  0.36805556  0.28472222]
 [ 0.          0.04166667  0.36805556  0.375     ]
 [ 0.          0.          0.00694444  0.28472222]
 [ 0.          0.00694444  0.00694444  0.24305556]
 [ 0.          0.01388889  0.00694444  0.30555556]] -> [ 0.34722222]
[[ 0.          0.04166667  0.36805556  0.375     ]
 [ 0.          0.          0.00694444  0.28472222]
 [ 0.          0.00694444  0.00694444  0.24305556]
 [ 0.          0.01388889  0.00694444  0.30555556]
 [ 0.          0.02083333  0.00694444  0.34722222]] -> [ 0.29166667]
[[ 0.          0.          0.00694444  0.28472222]
 [ 0.          0.00694444  0.00694444  0.24305556]
 [ 0.          0.01388889  0.00694444  0.30555556]
 [ 0.          0.02083333  0

[[ 0.          0.          0.0625      0.24305556]
 [ 0.00694444  0.00694444  0.0625      0.20833333]
 [ 0.00694444  0.01388889  0.0625      0.25      ]
 [ 0.00694444  0.02083333  0.0625      0.21527778]
 [ 0.00694444  0.02777778  0.0625      0.21527778]] -> [ 0.30555556]
[[ 0.00694444  0.00694444  0.0625      0.20833333]
 [ 0.00694444  0.01388889  0.0625      0.25      ]
 [ 0.00694444  0.02083333  0.0625      0.21527778]
 [ 0.00694444  0.02777778  0.0625      0.21527778]
 [ 0.00694444  0.03472222  0.0625      0.30555556]] -> [ 0.28472222]
[[ 0.00694444  0.01388889  0.0625      0.25      ]
 [ 0.00694444  0.02083333  0.0625      0.21527778]
 [ 0.00694444  0.02777778  0.0625      0.21527778]
 [ 0.00694444  0.03472222  0.0625      0.30555556]
 [ 0.00694444  0.04166667  0.0625      0.28472222]] -> [ 0.28472222]
[[ 0.00694444  0.02083333  0.0625      0.21527778]
 [ 0.00694444  0.02777778  0.0625      0.21527778]
 [ 0.00694444  0.03472222  0.0625      0.30555556]
 [ 0.00694444  0.04166667  0

 [ 0.01388889  0.02083333  0.22916667  0.22222222]] -> [ 0.14583333]
[[ 0.01388889  0.          0.22916667  0.17361111]
 [ 0.01388889  0.00694444  0.22916667  0.22222222]
 [ 0.01388889  0.01388889  0.22916667  0.20138889]
 [ 0.01388889  0.02083333  0.22916667  0.22222222]
 [ 0.01388889  0.02777778  0.22916667  0.14583333]] -> [ 0.63888889]
[[ 0.01388889  0.00694444  0.22916667  0.22222222]
 [ 0.01388889  0.01388889  0.22916667  0.20138889]
 [ 0.01388889  0.02083333  0.22916667  0.22222222]
 [ 0.01388889  0.02777778  0.22916667  0.14583333]
 [ 0.01388889  0.03472222  0.22916667  0.63888889]] -> [ 0.13194444]
[[ 0.01388889  0.01388889  0.22916667  0.20138889]
 [ 0.01388889  0.02083333  0.22916667  0.22222222]
 [ 0.01388889  0.02777778  0.22916667  0.14583333]
 [ 0.01388889  0.03472222  0.22916667  0.63888889]
 [ 0.01388889  0.04166667  0.22916667  0.13194444]] -> [ 0.25694444]
[[ 0.01388889  0.02083333  0.22916667  0.22222222]
 [ 0.01388889  0.02777778  0.22916667  0.14583333]
 [ 0.01388

ValueError: Variable rnn/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "c:\users\lg\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
    self._traceback = _extract_stack()
  File "c:\users\lg\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "c:\users\lg\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)


In [4]:
denormalizedTestPredictY=[item for sublist in test_predict for item in sublist]

In [5]:
rootMeanSquaredError(denormalizedTestY,denormalizedTestPredictY)

25.156532107932779

In [11]:
list(np.round(denormalizedTestPredictY))

[15.0,
 15.0,
 44.0,
 13.0,
 23.0,
 24.0,
 22.0,
 19.0,
 17.0,
 14.0,
 28.0,
 27.0,
 -20.0,
 -15.0,
 15.0,
 15.0,
 22.0,
 30.0,
 20.0,
 14.0,
 11.0,
 13.0,
 9.0,
 31.0,
 33.0,
 57.0,
 14.0,
 16.0,
 13.0,
 13.0,
 7.0,
 10.0,
 9.0,
 9.0,
 9.0,
 10.0,
 2.0,
 7.0,
 7.0,
 8.0,
 10.0,
 11.0,
 7.0,
 1.0,
 6.0,
 4.0,
 5.0,
 6.0,
 7.0,
 9.0,
 1.0,
 8.0,
 8.0,
 3.0,
 3.0,
 5.0,
 7.0,
 1.0,
 6.0,
 10.0,
 12.0,
 18.0,
 19.0,
 22.0,
 20.0,
 12.0,
 18.0,
 23.0,
 29.0,
 13.0,
 20.0,
 13.0,
 23.0,
 19.0,
 17.0,
 15.0,
 18.0,
 15.0,
 8.0,
 13.0,
 14.0,
 12.0,
 12.0,
 14.0,
 18.0,
 8.0,
 18.0,
 15.0,
 11.0,
 21.0,
 27.0,
 34.0,
 46.0,
 79.0,
 60.0,
 52.0,
 62.0,
 73.0,
 26.0,
 27.0,
 106.0,
 17.0,
 23.0,
 11.0,
 30.0,
 32.0,
 36.0,
 73.0,
 51.0,
 51.0,
 60.0,
 52.0,
 32.0,
 42.0,
 64.0,
 44.0,
 42.0,
 70.0]

In [7]:
denormalizedTestY

[25,
 27,
 25,
 21,
 28,
 24,
 21,
 29,
 121,
 1,
 7,
 32,
 19,
 34,
 28,
 23,
 15,
 10,
 15,
 9,
 78,
 59,
 29,
 23,
 12,
 36,
 13,
 17,
 17,
 16,
 14,
 8,
 16,
 14,
 14,
 15,
 20,
 24,
 8,
 15,
 17,
 9,
 17,
 13,
 19,
 8,
 14,
 18,
 24,
 16,
 11,
 13,
 10,
 16,
 11,
 22,
 15,
 18,
 11,
 14,
 34,
 10,
 13,
 21,
 35,
 15,
 6,
 23,
 22,
 19,
 20,
 13,
 21,
 10,
 11,
 11,
 16,
 12,
 15,
 8,
 1,
 24,
 19,
 22,
 13,
 12,
 16,
 2,
 16,
 20,
 12,
 11,
 22,
 20,
 16,
 17,
 97,
 1,
 23,
 100,
 18,
 13,
 21,
 24,
 16,
 18,
 22,
 18,
 11,
 24,
 22,
 20,
 22,
 26,
 1,
 1,
 5,
 31]