In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

import fxtool as ft


In [2]:
def get_candle_list(data, rate_types=["Open", "Close"]):
    
    change_list = []
    for o,c in zip(data[rate_types[0]],data[rate_types[1]]):
        change =  np.log(c/o)
        change_list.append(change)
    return change_list


In [4]:
data = ft.read_csv([
        "201506",
        "201505",
        "201504",
        "201503","201502","201501","201412","201411","201410"
    ])

201506
201505
201504
201503
201502
201501
201412
201411
201410


In [6]:
ohlc = ft.get_ohlc(data, '1H')

ohlc["Change(Close)"] =  ohlc["Close"].diff()
ohlc["Change(Open)"] = ohlc["Open"].diff()
ohlc["Change(Open-Close)"] = get_candle_list(ohlc)

ohlc.describe()



Unnamed: 0,Open,High,Low,Close,Change(Close),Change(Open),Change(Open-Close)
count,4620.0,4620.0,4620.0,4620.0,4619.0,4619.0,4620.0
mean,118.274402,118.368125,118.178516,118.277839,0.002766,0.002785,2.9e-05
std,4.358831,4.354997,4.362955,4.357811,0.138958,0.139632,0.001167
min,105.674,105.805,105.18,105.676,-1.544,-1.543,-0.012467
25%,117.695,117.792,117.57275,117.69775,,,-0.000483
50%,119.2615,119.355,119.173,119.263,,,3.3e-05
75%,120.29975,120.383,120.222,120.30225,,,0.000556
max,125.681,125.852,125.612,125.68,1.331,1.548,0.013126


In [7]:
ohlc['change_positive'] = 0
ohlc.ix[ohlc['Change(Open-Close)'] >= 0, 'change_positive'] = 1
ohlc['change_negative'] = 0
ohlc.ix[ohlc['Change(Open-Close)'] < 0, 'change_negative'] = 1

num_predictors = 10

data_columns = [
    'change_positive',  
    'change_negative'
]
for i in range(1,num_predictors+1):
    data_columns.append("change_"+str(i))

training_test_data = pd.DataFrame(columns=data_columns )

for i in range(4+i, len(ohlc)):
    data_dic = {}
    data_dic["change_positive"] = ohlc['change_positive'].ix[i]
    data_dic["change_negative"] = ohlc['change_negative'].ix[i]
    for j in range(1, num_predictors+1):
        data_dic["change_"+str(j)] = ohlc['Change(Open-Close)'].ix[i-j]
    
    training_test_data = training_test_data.append(
        data_dic,
        ignore_index=True
    )
    if i % 1000 == 0:
        print("{}".format(i))

training_test_data.head()

1000
2000
3000
4000


Unnamed: 0,change_positive,change_negative,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10
0,0.0,1.0,0.000519,0.000829,-0.000501,-0.000319,0.000209,-1.8e-05,-0.001065,0.000491,-0.000373,0.000601
1,0.0,1.0,-0.002606,0.000519,0.000829,-0.000501,-0.000319,0.000209,-1.8e-05,-0.001065,0.000491,-0.000373
2,1.0,0.0,-0.00084,-0.002606,0.000519,0.000829,-0.000501,-0.000319,0.000209,-1.8e-05,-0.001065,0.000491
3,0.0,1.0,0.000657,-0.00084,-0.002606,0.000519,0.000829,-0.000501,-0.000319,0.000209,-1.8e-05,-0.001065
4,0.0,1.0,-0.002485,0.000657,-0.00084,-0.002606,0.000519,0.000829,-0.000501,-0.000319,0.000209,-1.8e-05


In [8]:
predictors_tf = training_test_data[training_test_data.columns[2:]]

classes_tf = training_test_data[training_test_data.columns[:2]]

training_set_size = int(len(training_test_data) * 0.8)
test_set_size = len(training_test_data) - training_set_size

training_predictors_tf = predictors_tf[:training_set_size]
training_classes_tf = classes_tf[:training_set_size]

test_predictors_tf = predictors_tf[training_set_size:]
test_classes_tf = classes_tf[training_set_size:]

training_predictors_tf.describe()

Unnamed: 0,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10
count,3684.0,3684.0,3684.0,3684.0,3684.0,3684.0,3684.0,3684.0,3684.0,3684.0
mean,2.8e-05,2.8e-05,2.7e-05,2.7e-05,2.8e-05,2.8e-05,2.7e-05,2.7e-05,2.7e-05,2.7e-05
std,0.001193,0.001193,0.001193,0.001193,0.001193,0.001193,0.001193,0.001193,0.001193,0.001193
min,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021,-0.009021
25%,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504,-0.000504
50%,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05
75%,0.00058,0.000582,0.00058,0.00058,0.00058,0.00058,0.00058,0.00058,0.000578,0.00058
max,0.013126,0.013126,0.013126,0.013126,0.013126,0.013126,0.013126,0.013126,0.013126,0.013126


In [None]:
import tensorflow as tf

keep_prob = 1.0
hidden_num1 = 50
hidden_num2 = 25
learning_rate=0.0005
save_dir = "/share/var/tensorboard"


num_classes = len(training_classes_tf.columns)
print("num_predictor: {}".format( num_predictors))
print("num_classes: {}".format( num_classes))



with tf.Graph().as_default():
    sess1 = tf.Session()

    feature_data = tf.placeholder("float", [None, num_predictors])
    actual_classes = tf.placeholder("float", [None, num_classes])

    with tf.name_scope('hidden1'):
        weights1 = tf.Variable(tf.truncated_normal([num_predictors, hidden_num1], stddev=0.0001))
        biases1 = tf.Variable(tf.ones([hidden_num1]))
        hidden_layer_1 = tf.nn.relu(tf.matmul(feature_data, weights1) + biases1)

    with tf.name_scope("hidden2"):
        weights2 = tf.Variable(tf.truncated_normal([hidden_num1, hidden_num2], stddev=0.0001))
        biases2 = tf.Variable(tf.ones([hidden_num2]))
        hidden_layer_2 = tf.nn.relu(tf.matmul(hidden_layer_1, weights2) + biases2)

    dropout = tf.nn.dropout(hidden_layer_2, keep_prob)

    with tf.name_scope("softmax"):
        weights3 = tf.Variable(tf.truncated_normal([hidden_num2, num_classes], stddev=0.0001))
        biases3 = tf.Variable(tf.ones([num_classes]))
        model = tf.nn.softmax(tf.matmul(dropout, weights3) + biases3)

    cost = -tf.reduce_sum(actual_classes*tf.log(tf.clip_by_value(model,1e-10,1.0)))

    train_op1 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    init = tf.initialize_all_variables()
    sess1.run(init)
    
    correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(actual_classes, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    tf.scalar_summary("cross_entropy", cost)
    tf.scalar_summary("accuracy", accuracy)
    
    summary_op = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter(save_dir, sess1.graph)
    for i in range(1, 3000001):
        sess1.run(
            train_op1, 
            feed_dict={
              feature_data: training_predictors_tf.values, 
              actual_classes: training_classes_tf.values.reshape(len(training_classes_tf.values), 2)
            }
        )
        if i%5000 == 0:
            accuracy_value, cost_value = sess1.run([accuracy,cost],
                feed_dict={
                    feature_data: training_predictors_tf.values, 
                    actual_classes: training_classes_tf.values.reshape(len(training_classes_tf.values), 2)
                  }
            )
            print( "{} :{} : {}".format(i, accuracy_value, cost_value))
        
            summary_str = sess1.run(summary_op, 
                feed_dict={
                    feature_data: training_predictors_tf.values, 
                    actual_classes: training_classes_tf.values.reshape(len(training_classes_tf.values), 2)
                }
            )
            summary_writer.add_summary(summary_str, i)
        
#saver = tf.train.Saver()
#save_path = saver.save(sess1, "model.ckpt")

num_predictor: 10
num_classes: 2


In [11]:
"3000000 :0.6172638535499573 : 2145.8037109375, learn_rate=0.001"

'3000000 :0.6172638535499573 : 2145.8037109375'