In [24]:
import tensorflow as tf
import FinanceDataReader as fdr
import csv
import numpy as np
from tensorflow.contrib import rnn
import matplotlib
import os
import matplotlib.pyplot as plt
fdr.__version__

'0.7.1'

In [28]:
def write_file(data_list, file_name):
    with open(file_name,'w', newline='') as f:
        data_list = np.array(data_list)
        writer = csv.writer(f)
        writer.writerows(data_list)
    return

def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

def extract_x_and_y(data, sequence_length, plus_days_to_predict=0):
    data_x = []
    data_y = []
    for i in range(0, len(data) - sequence_length):
        x = data[i:i + sequence_length]
        if i + sequence_length + plus_days_to_predict < len(data):
            y = data[i + sequence_length + plus_days_to_predict, [3]]
        else:
            y = [0]
        data_x.append(x)
        data_y.append(y)
    return np.array(data_x), np.array(data_y)

# Make a lstm cell with hidden_size (each unit output vector size)
def lstm_cell(hidden_size):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

def getDailyStockPriceData(code, startDate, endDate, sequence_length, plus_days_to_predict = 0):
    data = np.array(fdr.DataReader(code, startDate, endDate))
    data = MinMaxScaler(data[:, 1:-1]) #시작가격, 최고가, 최적가, 종가, 거래량
    return extract_x_and_y(data, sequence_length, plus_days_to_predict)

In [29]:
# country code: ex) 000150: Doosan(KR), Yihua Healthcare(CN)
code = '005930'
data = np.array(fdr.DataReader(code, '1992-01-01', '2017-12-31'))
data = MinMaxScaler(data[:, 1:-1]) #시작가격, 최고가, 최적가, 종가, 거래량
print("shape:", data.shape, "\n", "data[0]: ", data[0])

test_data = np.array(fdr.DataReader('000150', '2018-01-01', '2018-12-29'))
test_data = MinMaxScaler(test_data[:, 1:-1]) #시작가격, 최고가, 최적가, 종가, 거래량

shape: (5756, 5) 
 data[0]:  [0.01482667041874382 0.014492498724845678 0.01499928744475025
 0.014842825084348164 0.00613960227656443]


In [30]:
data_dim = 5
hidden_dim = 10
num_classes = 1
sequence_length = 5

data_x, data_y = extract_x_and_y(data, sequence_length)
print(data_y.shape)
test_data_x, test_data_y = extract_x_and_y(test_data, sequence_length)
data_x_new, data_y_new = getDailyStockPriceData(code, '1992-01-01', '2017-12-31', sequence_length, 30)
batch_size = None
learning_rate = 0.01

(5751, 1)
(5751, 1)
(5751, 5, 5)


In [None]:
#이전 예제에서는 x y 를 one hot encoding으로 변환해서 사용했는데  
# 지금 예제에서는 어떻게 사용할 것인가.
# X = [0.2322834645664719 0.23285198555914646 0.2284569138271975
# 0.2538759689917561 0.052436875076320326]   -> [1] 다음날 가격 오름  [0] 다음날 가격 떨어짐 y를 [?, 1] shape으로 변환해서 만들어야 할듯

In [None]:
X = tf.placeholder(tf.float32, [batch_size, sequence_length, data_dim])
Y = tf.placeholder(tf.float32, [batch_size, num_classes])
print("shape of X: ", X)
print("shape of Y: ", Y)

In [None]:
cell = rnn.MultiRNNCell([lstm_cell(hidden_dim) for _ in range(2)], state_is_tuple=True)
# cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple=True, activation=tf.tanh)

In [None]:
# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
print(outputs)

# FC layer
X_for_fc = tf.reshape(outputs, [-1, hidden_dim])
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)

# reshape out for sequence_loss
outputs = tf.reshape(outputs, [-1, sequence_length, num_classes])

# We use the last cell's output
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], num_classes, activation_fn=None)

loss = tf.reduce_sum(tf.square(Y_pred - Y))
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [None]:
# RMSE
expected_results = tf.placeholder(tf.float32, [batch_size, 1])
predictions = tf.placeholder(tf.float32, [batch_size, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(expected_results - predictions)))

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(20000):
    _, step_loss = sess.run([train_op, loss], feed_dict={X: data_x, Y: data_y})
    if i % 1000 == 0:
        print("[step: {}] loss: {}".format(i, step_loss))

In [None]:
#Compare tarining x and y
predict = sess.run(Y_pred, feed_dict={X: data_x})
rmse_val = sess.run(rmse, feed_dict={expected_results: data_y, predictions: predict})
print("RMSE: {}".format(rmse_val))

# Plot predictions
plt.plot(data_y)
plt.plot(predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
# Test step
test_predict = sess.run(Y_pred, feed_dict={X: test_data_x})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y, predictions: test_predict})
print("RMSE: {}".format(rmse_val))

# Plot predictions
plt.plot(test_data_y)
plt.plot(test_predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# test Y
print("expeced:")
plt.plot(test_data_y)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# predict Y
print("predicted:")
plt.plot(test_predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
test_data_2 = np.array(fdr.DataReader('005930', '2018-01-01', '2018-12-29'))
test_data_2 = MinMaxScaler(test_data_2[:, 1:-1]) #시작가격, 최고가, 최적가, 종가, 거래량
test_data_x_2, test_data_y_2 = extract_x_and_y(test_data_2, sequence_length)

# Test step
test_predict_2 = sess.run(Y_pred, feed_dict={X: test_data_x_2})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y_2, predictions: test_predict_2})
print("RMSE: {}".format(rmse_val))

# Plot predictions
plt.plot(test_data_y_2)
plt.plot(test_predict_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("expected: ")
plt.plot(test_data_y_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("predicted: ")
plt.plot(test_predict_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
test_data_x_3, test_data_y_3 = getDailyStockPriceData('005930', '2018-01-01', '2018-12-29', 5, 30)
print(test_data_y_3.shape)
# Test step
test_predict_3 = sess.run(Y_pred, feed_dict={X: test_data_x_3})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_predict_3, predictions: test_predict_3})
print("RMSE: {}".format(rmse_val))

print("both: ")
plt.plot(test_data_y_3)
plt.plot(test_predict_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("expected: ")
plt.plot(test_data_y_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("predicted: ")
plt.plot(test_predict_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()
