
### Data Formatting

In [None]:
import numpy as np
import pandas as pd

#
# Change these to switch around parameters
#

set_size = 4 # Number of consecutive days in a row
data_column = 1 # Index of column to use (S&P - 1)




# Load data from current directory
data = pd.read_csv('./data_stocks.csv')

data = data.values

# Pull specified column of data
snp = data[:,np.arange(data_column, data_column + 1)]

data_set = []

# Loop over full set
for idx in range(len(snp)):

    row = []

    # Check if our range goes over the list length
    if idx + set_size > len(snp):
        break

    # Build a row starting at current
    # index to index + set_size
    for v in range(set_size):
        is_last = v == (set_size - 1)
        
        current_index = idx + v
        current_value = snp[current_index][0]

        # Produce last day predictor if it's the last
        # iteration
#         if is_last:
#             last_value = row[-1]
#             diff = current_value - last_value
            
#             # Reults in either [-1,0,1]
#             predictor = 0 if diff == 0 else (diff/abs(diff))
#             row.append(predictor)
            
#         # Otherwise append the value
#         else:
#             row.append(current_value)

        row.append(current_value)


    row = np.array(row)
    data_set.append(row)


data_set = np.asarray(data_set)

fmt = {'float_kind':'{:0.2f}'.format}
np.set_printoptions(suppress=True, formatter=fmt)

# Prints top 10 records for quick confirmation
print(data_set[0:10])

np.savetxt("output.csv", data_set, delimiter=",", fmt="%10.2f")
print("\n\nSaved data to 'output.csv'")


### Network

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


data = pd.read_csv('./output.csv')

data = data.values
np.random.shuffle(data)

# Split data into inputs and outputs
# inputs = data[:, np.arange(0,2)]
# outputs = data[:, 3]

# Scale Data to [-1,1]
scaler = MinMaxScaler(feature_range=(-1, 1))
# scaled_input = scaler.fit_transform(inputs)

scaled_data = scaler.fit_transform(data)
# Split data into inputs and outputs
inputs = scaled_data[:, np.arange(0,2)]
outputs = scaled_data[:, 3]

# Split Data into 80% train, 20% test
cutoff = int(np.floor(0.8 * len(scaled_input)))

train_inputs = scaled_input[:cutoff]
train_outputs = outputs[:cutoff]

test_inputs = scaled_input[cutoff + 1:]
test_outputs = outputs[cutoff + 1:]

# Setup placeholders for input and output
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None])
            
# Build basic network with sigmoid activation
# and using the pre-built dense layer
network = tf.layers.dense(X, units=1, activation=tf.nn.sigmoid)
network = tf.layers.dense(network, units=1, activation=tf.nn.sigmoid)
cost = tf.reduce_mean((network - Y)**2)
# cost = tf.reduce_mean(tf.squared_difference(network, Y))

optimizer = tf.train.RMSPropOptimizer(0.01).minimize(cost)
init = tf.global_variables_initializer()

batch_size = 256

with tf.Session() as sess:
    sess.run(init)
   
    for epoch in range(100):
        shuffled_indices = np.random.permutation(np.arange(len(train_inputs)))
        train_inputs = train_inputs[shuffled_indices]
        train_outputs = train_outputs[shuffled_indices]
        
        for i in range(len(train_outputs // batch_size)):
            start = i * batch_size
            batch_x = train_inputs[start:start + batch_size]
            batch_y = train_outputs[start:start + batch_size]
            
            sess.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y})
   
        print(sess.run(cost, feed_dict={X: train_inputs, Y: train_outputs}))
        print(sess.run(cost, feed_dict={X: test_inputs, Y: test_outputs}))