In [229]:
%matplotlib inline
import pandas as pd
import numpy as np

import fxtool as ft


In [230]:
def get_candle_list(data, rate_types=["Open", "Close"]):
    
    change_list = []
    for o,c in zip(data[rate_types[0]],data[rate_types[1]]):
        change =  np.log(c/o)
        change_list.append(change)
    return change_list


In [256]:
months = pd.date_range('2015-1', periods=6, freq='M')
months = months.tolist()
months_list = [m.strftime('%Y%m' ) for m in months]
months_list

['201501', '201502', '201503', '201504', '201505', '201506']

In [257]:
data = ft.read_csv(months_list)

201501
201502
201503
201504
201505
201506


In [258]:
ohlc = ft.get_ohlc(data, '1H')

ohlc["Change(Close)"] =  ohlc["Close"].diff()
ohlc["Change(Open)"] = ohlc["Open"].diff()
ohlc["Change(Open-Close)"] = get_candle_list(ohlc)

ohlc.describe()



Unnamed: 0,Open,High,Low,Close,Change(Close),Change(Open),Change(Open-Close)
count,3056.0,3056.0,3056.0,3056.0,3055.0,3055.0,3056.0
mean,120.24901,120.337699,120.159607,120.250992,0.000841,0.00091,1.6e-05
std,2.015482,2.009448,2.020748,2.015655,0.132075,0.131951,0.001078
min,115.98,116.238,115.852,115.98,-1.544,-1.543,-0.012467
25%,118.9535,119.03625,118.869,118.957,,,-0.000463
50%,119.744,119.8175,119.6715,119.7445,,,2.9e-05
75%,121.283,121.35925,121.2195,121.2845,,,0.000518
max,125.681,125.852,125.612,125.68,1.331,1.329,0.011287


In [271]:
ohlc['change_positive'] = 0
ohlc.ix[ohlc['Change(Open-Close)'] >= 0, 'change_positive'] = 1
ohlc['change_negative'] = 0
ohlc.ix[ohlc['Change(Open-Close)'] < 0, 'change_negative'] = 1

num_predictors = 50

data_columns = [
    'change_positive',  
    'change_negative'
]
for i in range(1,num_predictors+1):
    data_columns.append("change_"+str(i))

training_test_data = pd.DataFrame(columns=data_columns )

for i in range(4+i, len(ohlc)):
    data_dic = {}
    data_dic["change_positive"] = ohlc['change_positive'].ix[i]
    data_dic["change_negative"] = ohlc['change_negative'].ix[i]
    for j in range(1, num_predictors+1):
        data_dic["change_"+str(j)] = ohlc['Change(Open-Close)'].ix[i-j]
    
    training_test_data = training_test_data.append(
        data_dic,
        ignore_index=True
    )
    if i % 1000 == 0:
        print("{}".format(i))

training_test_data.head()

1000
2000
3000


Unnamed: 0,change_positive,change_negative,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,...,change_41,change_42,change_43,change_44,change_45,change_46,change_47,change_48,change_49,change_50
0,0.0,1.0,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,0.00102,...,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964,-0.000316,0.00069
1,0.0,1.0,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,...,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964,-0.000316
2,0.0,1.0,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,...,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964
3,1.0,0.0,-0.001413,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,...,0.000589,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133
4,0.0,1.0,0.003101,-0.001413,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,...,-0.00401,0.000589,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166


In [272]:
predictors_tf = training_test_data[training_test_data.columns[2:]]

classes_tf = training_test_data[training_test_data.columns[:2]]

training_set_size = int(len(training_test_data) * 0.8)
test_set_size = len(training_test_data) - training_set_size

training_predictors_tf = predictors_tf[:training_set_size]
training_classes_tf = classes_tf[:training_set_size]

test_predictors_tf = predictors_tf[training_set_size:]
test_classes_tf = classes_tf[training_set_size:]

training_predictors_tf.head()

Unnamed: 0,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10,...,change_41,change_42,change_43,change_44,change_45,change_46,change_47,change_48,change_49,change_50
0,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,0.00102,-0.001338,0.000669,...,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964,-0.000316,0.00069
1,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,0.00102,-0.001338,...,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964,-0.000316
2,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,0.00102,...,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133,0.000964
3,-0.001413,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,-0.000184,...,0.000589,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166,-0.000133
4,0.003101,-0.001413,-0.002368,-0.000428,-0.000318,-0.000938,0.002347,-0.002138,0.001089,-0.002871,...,-0.00401,0.000589,-0.000191,0.001211,0.000365,0.000216,0.000523,-0.000781,0.000307,-0.000166


In [274]:
!rm -rf tmp/tensorflow_log/*

import tensorflow as tf
import numpy as np
import random


num_of_input_nodes = 1
num_of_hidden_nodes = 80
num_of_output_nodes = 1
num_of_training_epochs = 500000
batch_size = 100
num_of_prediction_epochs = 100
learning_rate = 0.001
forget_bias = 0.9
num_of_sample = 1000
num_layers = 1

batch_size = 100
sequences_length = num_predictors 
test_num = int(num_of_sample*0.3)
class_num = 2

def get_batch(batch_size, X, t):
    rnum = [random.randint(0, len(X) - 1) for x in range(batch_size)]
    xs = np.array([[[y] for y in list(X[r])] for r in rnum])
    ts = np.array([t[r] for r in rnum])
    return xs, ts


def create_batch(batch_size, X, t):
    X = X.as_matrix()
    t = t.as_matrix()
    rnum = [random.randint(0, len(X) - 1) for x in range(batch_size)]
    xs = np.array([[[y] for y in list(X[r])] for r in rnum])
    ts = np.array([t[r] for r in rnum])
    return xs, ts


def unpack_sequence(tensor):
    return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))

def pack_sequence(sequence):
    return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])

def inference(input_ph):
    with tf.name_scope("inference") as scope:
        in_size = num_of_hidden_nodes
        out_size = class_num
        weight = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1))
        bias = tf.Variable(tf.constant(0.1, shape=[out_size]))
        
       
        # network = tf.nn.rnn_cell.LSTMCell(num_of_hidden_nodes)
        network = tf.nn.rnn_cell.GRUCell(num_of_hidden_nodes)
        network = tf.nn.rnn_cell.DropoutWrapper(network, output_keep_prob=0.5)
        network = tf.nn.rnn_cell.MultiRNNCell([network] * num_layers)
        inputs =  unpack_sequence(input_ph)
        
        rnn_output, states_op = tf.nn.rnn(network,inputs,dtype=tf.float32)
        #rnn_output = pack_sequence(rnn_output)
        #state_op = pack_sequence(states_op)
        output_op = tf.nn.softmax(tf.matmul(rnn_output[-1], weight) + bias)

 
        tf.histogram_summary("weights", weight)
        tf.histogram_summary("biases", bias)
        tf.histogram_summary("output",  output_op)
        results = [weight, bias]
        return output_op, states_op, results


def loss(output_op, supervisor_ph):
    with tf.name_scope("loss") as scope:
        loss_op = - tf.reduce_sum(supervisor_ph * tf.log(output_op))
        tf.scalar_summary("loss", loss_op)
        return loss_op


def training(loss_op):
    with tf.name_scope("training") as scope:
        training_op = optimizer.minimize(loss_op)
        return training_op

def accuracy(output_op, supervisor_ph):
    with tf.name_scope("accuracy") as scope:
        correct_prediction = tf.equal(tf.argmax(output_op,1), tf.argmax(supervisor_ph,1))
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        tf.scalar_summary("accuracy", accuracy_op)
        return accuracy_op

def calc_accuracy(accuracy_opp, X, t):
    inputs, targets = create_batch(len(X), X, t)
    pred_dict = {
        input_ph:  inputs,
        supervisor_ph: targets
    }
    accurecy = sess.run(accuracy_op, feed_dict=pred_dict)
    print(accurecy)



random.seed(0)
np.random.seed(0)
tf.set_random_seed(0)

#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)

with tf.Graph().as_default():
    input_ph = tf.placeholder(tf.float32, [None, sequences_length, num_of_input_nodes], name="input")
    supervisor_ph = tf.placeholder(tf.float32, [None, class_num], name="supervisor")

    output_op, states_op, datas_op = inference(input_ph)
    loss_op = loss(output_op, supervisor_ph)
    training_op = training(loss_op)
    accuracy_op = accuracy(output_op, supervisor_ph)

    summary_op = tf.merge_all_summaries()
    init = tf.initialize_all_variables()

    with tf.Session() as sess:
        saver = tf.train.Saver()
        summary_writer = tf.train.SummaryWriter("tmp/tensorflow_log", graph=sess.graph)
        sess.run(init)

        for epoch in range(num_of_training_epochs):
            inputs, supervisors = create_batch(batch_size, training_predictors_tf , classes_tf[:training_set_size])
            train_dict = {
                input_ph:   inputs,
                supervisor_ph: supervisors
            }
            sess.run(training_op, feed_dict=train_dict)

            if (epoch) % 1000 == 0:
                summary_str, train_loss = sess.run([summary_op, loss_op], feed_dict=train_dict)
                print("train#{}, loss: {}".format(epoch, train_loss))
                summary_writer.add_summary(summary_str, epoch)
                if (epoch) % 5000 == 0:
                    calc_accuracy(output_op, test_predictors_tf, test_classes_tf)
        calc_accuracy(output_op, X_test, t_test)
        datas = sess.run(datas_op)
        saver.save(sess, "model.ckpt")

train#0, loss: 69.31488037109375
0.467554
train#1000, loss: 69.31159210205078
train#2000, loss: 69.30162048339844
train#3000, loss: 69.32111358642578
train#4000, loss: 69.30297088623047
train#5000, loss: 69.31268310546875
0.522463
train#6000, loss: 69.33447265625
train#7000, loss: 69.32402038574219
train#8000, loss: 69.24475860595703
train#9000, loss: 69.33680725097656
train#10000, loss: 69.31669616699219
0.530782
train#11000, loss: 69.28875732421875
train#12000, loss: 69.30217742919922
train#13000, loss: 69.31776428222656
train#14000, loss: 69.28911590576172
train#15000, loss: 69.28043365478516
0.527454
train#16000, loss: 69.320556640625
train#17000, loss: 69.26954650878906
train#18000, loss: 69.33114624023438
train#19000, loss: 69.31207275390625
train#20000, loss: 69.32240295410156
0.542429
train#21000, loss: 69.42078399658203
train#22000, loss: 69.30817413330078
train#23000, loss: 69.33502960205078
train#24000, loss: 69.29547119140625
train#25000, loss: 69.25830078125
0.52579
train#

KeyboardInterrupt: 

In [225]:
!rm -rf var/tensorboard/*
print("deleted logs")
import tensorflow as tf

keep_prob = 1.0
hidden_num1 = 500
hidden_num2 = 25
learning_rate=0.0005
save_dir = "var/tensorboard"
learning_num = 3000001


num_classes = len(training_classes_tf.columns)
print("num_predictor: {}".format( num_predictors))
print("num_classes: {}".format( num_classes))


def inference(feature_data,  actual_classes):
    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([num_predictors, hidden_num1], stddev=0.0001))
        biases1 = tf.Variable(tf.ones([hidden_num1]))
        hidden_layer_1 = tf.nn.relu(tf.matmul(feature_data, weights1) + biases1)

    with tf.name_scope("hidden2"):
        weights2 = tf.Variable(tf.truncated_normal([hidden_num1, hidden_num2], stddev=0.0001))
        biases2 = tf.Variable(tf.ones([hidden_num2]))
        hidden_layer_2 = tf.nn.relu(tf.matmul(hidden_layer_1, weights2) + biases2)

    dropout = tf.nn.dropout(hidden_layer_2, keep_prob)

    with tf.name_scope("softmax"):
        weights3 = tf.Variable(tf.truncated_normal([hidden_num2, num_classes], stddev=0.0001))
        biases3 = tf.Variable(tf.ones([num_classes]))
        model = tf.nn.softmax(tf.matmul(dropout, weights3) + biases3)
    return model


with tf.Graph().as_default():
    sess1 = tf.Session()

    feature_data = tf.placeholder("float", [None, num_predictors])
    actual_classes = tf.placeholder("float", [None, num_classes])

    model = inference(feature_data, actual_classes)

    cost = -tf.reduce_sum(actual_classes*tf.log(tf.clip_by_value(model,1e-10,1.0)))
    
    tf.scalar_summary("cross_entropy", cost)

    train_op1 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    saver = tf.train.Saver()
    init = tf.initialize_all_variables()
    sess1.run(init)
    
    correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(actual_classes, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.scalar_summary("accuracy", accuracy)
    
    summary_op = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter(save_dir, sess1.graph)
    f_data = training_predictors_tf.values
    a_classes = training_classes_tf.values.reshape(len(training_classes_tf.values), 2)
    print("Start")
    for i in range(1, learning_num):
        sess1.run(
            train_op1, 
            feed_dict={
              feature_data: f_data, 
              actual_classes: a_classes
            }
        )
        if i%1000 == 0:
            accuracy_value, cost_value = sess1.run([accuracy,cost],
                feed_dict={
                    feature_data: test_predictors_tf.values, 
                    actual_classes: test_classes_tf.values.reshape(len(test_classes_tf.values), 2)
                  }
            )
            print( "{} :{} : {}".format(i, accuracy_value, cost_value))
        
            summary_str = sess1.run(summary_op, 
                feed_dict={
                    feature_data: test_predictors_tf.values, 
                    actual_classes: test_classes_tf.values.reshape(len(test_classes_tf.values), 2)
                }
            )
            summary_writer.add_summary(summary_str, i)
        
save_path = saver.save(sess1, "model.ckpt")
accuracy_value, cost_value = sess1.run([accuracy, cost],
                feed_dict={
                    feature_data: test_predictors_tf.values, 
                    actual_classes: test_classes_tf.values.reshape(len(test_classes_tf.values), 2)
                  }
            )
print( "{} :{} : {}".format("test", accuracy_value, cost_value))

deleted logs
num_predictor: 20
num_classes: 2
Start


KeyboardInterrupt: 