In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from data_functions import *

In [None]:
def read_stock_csv(filename):
    data = pd.read_csv(filename, header=0)
    key = np.array(data.keys(), dtype='string')
    value = data.as_matrix() #Data type of value is 'object' --> need to be converted
    date, feature = np.array(value[:,0], dtype='string'), np.array(value[:,1:], dtype='float32')
    feature = (feature - np.mean(feature, axis=0))/np.std(feature,axis=0)
    #Data Normalization for Feature Scaling
    return key, date, feature
    

In [None]:
#Hyperparameters for model
LEARNING_RATE = 0.0005
BATCH_SIZE = 100
N_EPOCHS = 1000
SEQ_SIZE = 10
INPUT_DIM = 6
HIDDEN_DIM = 6
OUTPUT_DIM = 1
PRED_STEP = 1
TRAIN_RATIO = 0.8
TEST_RATIO = 0.2

In [None]:
key, date, feature = read_stock_csv('S&P500.csv')
_y_data = feature[:,3] #Close Stock Price
_x_data = feature[:]

print _x_data, '\n\n', _y_data
print np.shape(_x_data), np.shape(_y_data) 

In [None]:
#data preparation
assert TRAIN_RATIO + TEST_RATIO == 1.0
assert len(_x_data) == len(_y_data)
    
x_data = []
y_data = []

# Shape of Sample? --> X: (SEQ_SIZE, INPUT_DIM), Y: (OUTPUT_DIM)
for i in range(len(_x_data)-(SEQ_SIZE+PRED_STEP)+1):
    _x = _x_data[i:i+SEQ_SIZE,:]
    _y = _y_data[i+ SEQ_SIZE+(PRED_STEP-1)] #next data price
    x_data.append(_x)
    y_data.append(_y)            
    
data_len = len(x_data)
train_len = int(data_len * TRAIN_RATIO)
test_len = data_len - train_len
print train_len, test_len
    
x_train, y_train = np.array(x_data[:train_len], dtype=float), np.array(y_data[:train_len], dtype=float)
x_test, y_test = np.array(x_data[-test_len:], dtype=float), np.array(y_data[-test_len:], dtype=float)

In [None]:
with tf.name_scope('placeholders'):
    x_input = tf.placeholder(tf.float32, [None, SEQ_SIZE, INPUT_DIM])
    y_input = tf.placeholder(tf.float32, [None, OUTPUT_DIM])

In [None]:
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

In [None]:
#Construnct Neural Network Model (Last layer is for Regression)
with tf.variable_scope("RNN1") as scope:
    cell = tf.contrib.rnn.BasicLSTMCell(num_units=HIDDEN_DIM, 
                                        state_is_tuple=True)
    rnn_out, rnn_state = tf.nn.dynamic_rnn(cell, x_input, dtype=tf.float32)
    print rnn_state
    #shape of rnn_out = (BATCH_SIZE, SEQ_SIZE, NUM_UNITS)
    
with tf.variable_scope("FC1") as scope:
    #flatten_rnn_out = tf.contrib.layers.flatten(rnn_out) #Flatten rnn_output for full-connected layer
    w_fc1 = tf.get_variable('w_fc1', [HIDDEN_DIM, OUTPUT_DIM], dtype=tf.float32)
    b_fc1 = tf.get_variable('b_fc1', [OUTPUT_DIM])
    print w_fc1, b_fc1
    
    y_pred = tf.matmul(rnn_out[:,-1,:], w_fc1) + b_fc1
    #pred_mean, pred_std = tf.expand_dims(x_mean[:,:,3],axis=2), tf.expand_dims(x_std[:,:,3],axis=2)
    #y_pred = _y_pred * pred_std + pred_mean

In [None]:
#Define Loss Function and Optimizer
loss = tf.reduce_mean(tf.square(y_pred-y_input))

optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
train = optimizer.minimize(loss, global_step = global_step)

#Session and Initialization of Variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
#Batch Running
for i in range(N_EPOCHS):
    temp_x, temp_y = shuffle_data(x_train, y_train)
    batch_num = int(train_len/BATCH_SIZE)
    index = 0
    for j in range(batch_num-1): #Except Last Batch for preventing from range error
        x_batch, y_batch = temp_x[index:index+BATCH_SIZE], temp_y[index:index+BATCH_SIZE]
        y_batch = np.reshape(y_batch, [len(y_batch),1])
        index +=BATCH_SIZE
        loss_batch, _ = sess.run([loss, train], feed_dict={x_input: x_batch, y_input: y_batch})
        print loss_batch

In [None]:
#Test Run and Visualization
feed_dict = {x_input: x_test, y_input: np.reshape(y_test, [len(y_test),1])}
test_loss, test_pred = sess.run([loss, y_pred], feed_dict=feed_dict)

import matplotlib.pyplot as plt
plt.plot(test_pred, 'b', label='Predict')
plt.plot(y_test, 'r', label="True")
plt.title('S&P Close Price')
plt.xlabel('Days')
plt.ylabel('Price ($)')
plt.legend()
plt.show()

