In [None]:
import tensorflow as tf
import FinanceDataReader as fdr
import csv
import numpy as np
from tensorflow.contrib import rnn
import matplotlib
import os
import matplotlib.pyplot as plt
import datetime
import time

fdr.__version__

In [None]:
def write_file(data_list, file_name):
    with open(file_name,'w', newline='') as f:
        data_list = np.array(data_list)
        writer = csv.writer(f)
        writer.writerows(data_list)
    return

def MinMaxScaler(data):
    for i in range (0, len(data)):
        data[i, 0] = time.mktime(data[i, 0].timetuple())
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

def extract_x_and_y(data, sequence_length):
    data_x = []
    data_y = []
    time_stamp_y = []
    for i in range(0, len(data) - sequence_length + 1):
        x = data[i:i + sequence_length]
        y_index = i + sequence_length
        if y_index < len(data):
            y = data[y_index]
            time_stamp_y.append(data[y_index, [0]])
            data_y.append(y)
        data_x.append(x)
    return np.array(data_x), np.array(data_y), np.array(time_stamp_y), data

def extract_x_and_y_for_training(data, sequence_length):
    data_x = []
    data_y = []
    time_stamp_y = []
    for i in range(0, len(data) - sequence_length):
        x = data[i:i + sequence_length]
        y_index = i + sequence_length
#         y = data[y_index, [4]]
        y = data[y_index]
        time_stamp_y.append(data[y_index, [0]])
        if y_index >= len(data) - 1:
            print("i: ", i," y: ",y, " len: ", len(data))
        data_x.append(x)
        data_y.append(y)
    return np.array(data_x), np.array(data_y), np.array(time_stamp_y)

def lstm_cell(hidden_size):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

def getDailyStockPriceData(code, startDate, endDate, sequence_length, isTraining = False):
    data = np.array(fdr.DataReader(code, startDate, endDate))
    data = MinMaxScaler(data[:, :-1]) #시작가격, 최고가, 최적가, 종가, 거래량
    if isTraining == False:
        return extract_x_and_y(data, sequence_length)
    else:
        return extract_x_and_y_for_training(data, sequence_length)

In [None]:
data_dim = 6
hidden_dim = 10
num_classes = 6
sequence_length = 30
batch_size = None
learning_rate = 0.01

In [None]:
#이전 예제에서는 x y 를 one hot encoding으로 변환해서 사용했는데  
# 지금 예제에서는 어떻게 사용할 것인가.
# X = [0.2322834645664719 0.23285198555914646 0.2284569138271975
# 0.2538759689917561 0.052436875076320326]   -> [1] 다음날 가격 오름  [0] 다음날 가격 떨어짐 y를 [?, 1] shape으로 변환해서 만들어야 할듯

In [None]:
X = tf.placeholder(tf.float32, [batch_size, sequence_length, data_dim])
Y = tf.placeholder(tf.float32, [batch_size, num_classes])

In [None]:
cell = rnn.MultiRNNCell([lstm_cell(hidden_dim) for _ in range(2)], state_is_tuple=True)
# cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim, state_is_tuple=True, activation=tf.tanh)

In [None]:
# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
print(outputs)
# FC layer
X_for_fc = tf.reshape(outputs, [-1, hidden_dim])
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)

# reshape out for sequence_loss
outputs = tf.reshape(outputs, [-1, sequence_length, num_classes])

# We use the last cell's output
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], num_classes, activation_fn=None)
print(Y_pred)

# Y_pred_end_price = Y_pred[:, [4]]

loss = tf.reduce_sum(tf.square(Y_pred - Y))
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [None]:
# RMSE
expected_results = tf.placeholder(tf.float32, [batch_size, num_classes])
predictions = tf.placeholder(tf.float32, [batch_size, num_classes])
rmse = tf.sqrt(tf.reduce_mean(tf.square(expected_results - predictions)))

In [None]:
start_date_train = '1992-01-01'
end_date_train = '2018-11-30'

#Training Set #1
code = '005930' #삼성전자
data_x, data_y, time_stamp_y = getDailyStockPriceData(code, start_date_train, end_date_train, sequence_length, True)
print(data_x.shape)
print(data_y.shape)

#Training Set #2

code_2 = '041510'
data_x_2, data_y_2, time_stamp_y_2 = getDailyStockPriceData(code_2, start_date_train, end_date_train, sequence_length, True)

# training_set = [[data_x_2, data_y_2]]
training_set = [[data_x, data_y]]

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# saver = tf.train.Saver()
# saver.restore(sess,'./daily_stock_prediction_model.ckpt')

for t in range (0, len(training_set)):
    print("training set #", t)
    x = training_set[t][0]
    y = training_set[t][1]
    print(x.shape)
    print(y.shape)
    for i in range(10):
        _, step_loss = sess.run([train_op, loss], feed_dict={X: x, Y: y})
        if (i + 1) % 1 == 0:
            print("[#{} step: {}] loss: {}".format(t, i, step_loss))

In [None]:
#Compare tarining x and y
predict = sess.run(Y_pred, feed_dict={X: data_x})
print(predict.shape)
rmse_val = sess.run(rmse, feed_dict={expected_results: data_y, predictions: predict})
print("RMSE: {}  date range: {}~{}".format(rmse_val, start_date_train, end_date_train))

# Plot predictions
plt.plot(time_stamp_y, data_y)
plt.plot(time_stamp_y, predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
plt.plot(data_y)
plt.plot(predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("expected")
plt.plot(data_y)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

print("predicted")
plt.plot(predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
# Test step
test_data_x, test_data_y, test_data_time_stamp_y = getDailyStockPriceData('000150', '2018-01-01', '2018-12-29', sequence_length, True)

test_predict = sess.run(Y_pred, feed_dict={X: test_data_x})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y, predictions: test_predict})
print("RMSE: {}".format(rmse_val))

# Plot predictions
plt.plot(test_data_time_stamp_y, test_data_y)
plt.plot(test_data_time_stamp_y, test_predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
plt.plot(test_data_y)
plt.plot(test_predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()


# test Y
print("expeced:")
plt.plot(test_data_y)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# predict Y
print("predicted:")
plt.plot(test_predict)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
# Test step
print("JYP Ent. 2018-01-01 ~ 2018-12-29")
test_data_x_2, test_data_y_2, test_data_time_stamp_y_2 = getDailyStockPriceData('035900', '2018-01-01', '2018-12-29', sequence_length, True)
test_predict_2 = sess.run(Y_pred, feed_dict={X: test_data_x_2})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y_2, predictions: test_predict_2})
print("RMSE: {}".format(rmse_val))

# Plot predictions
plt.plot(test_data_time_stamp_y_2, test_data_y_2)
plt.plot(test_data_time_stamp_y_2, test_predict_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("expected: ")
plt.plot(test_data_y_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("predicted: ")
plt.plot(test_predict_2)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
# Test step
print("삼성전자: 2018-01-01 ~ 2018-12-29")
test_data_x_3, test_data_y_3, test_data_time_stamp_y_3 = getDailyStockPriceData('005930', '1992-01-01', '2018-12-29', sequence_length, True)
test_predict_3 = sess.run(Y_pred, feed_dict={X: test_data_x_3})
rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y_3, predictions: test_predict_3})
print("RMSE: {}".format(rmse_val))

print("both: ")
plt.plot(test_data_time_stamp_y_3, test_data_y_3)
plt.plot(test_data_time_stamp_y_3, test_predict_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

print("both: ")
plt.plot(test_data_y_3)
plt.plot(test_predict_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("expected: ")
plt.plot(test_data_y_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

# Plot predictions
print("predicted: ")
plt.plot(test_predict_3)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()

In [None]:
def get_predict_data_of_last_date(x):
    outputs = sess.run(Y_pred, feed_dict={X: x})
    return outputs[-1:]

def retrain_with_outputs(data, x_data, sequence_length, addtional_batch_count):
    print("data: {} x_data {}".format(data.shape, x_data.shape))
    for i in range(0, addtional_batch_count):
        predict_of_last = get_predict_data_of_last_date(x_data)
        data = np.append(data, predict_of_last, axis=0)
        x_data , y_data ,_ ,_ = extract_x_and_y(data, sequence_length)
        print("x : {} data: {}".format(x_data.shape, data.shape))
    print("result: {} {}".format(x_data.shape, y_data.shape))
    return x_data, y_data

In [None]:
# Test step
test_data_x_4, test_data_y_4, test_data_time_stamp_y_4, data = getDailyStockPriceData('005930', '2018-11-15', '2018-12-31', sequence_length, False)
x_data_retrained, y_data_retrained = retrain_with_outputs(data, test_data_x_4, sequence_length, 3)

# rmse_val = sess.run(rmse, feed_dict={expected_results: test_data_y_4, predictions: test_predict_4})
# print("RMSE: {}".format(rmse_val))
#31번째 X data [1, 6] 를 추가해서 prediction을 재시작해야한다.


predict_end_prices = y_data_retrained[:,-1:]\
plt.plot(predict_end_prices)
plt.xlabel("Time Period")
plt.ylabel("Stock Price")
plt.show()





In [None]:
saver = tf.train.Saver()
# saver .save(sess,'./daily_stock_prediction_model.ckpt')