In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
import math
import tensorflow as tf
%matplotlib inline

In [2]:
# BATCH_SIZE = 128
# CNT_NEURONS = 128
# LEARNING_EPS = 1e-2
# LEARNING_RATE = 0.01
# lr = theano.shared(np.array(LEARNING_RATE, dtype=np.float32))
# CNT_EPOCHS = 200
# CNT_ITERATIONS = 10
# TIME_PERIOD = 60
# EPS = 1e-6

In [3]:
class UtilFunctions:
    
    GRANULARITY = 100
    
    def calculate_sharpe_ratio_from_returns(returns):
        numerator = np.mean(returns)
        denominator = np.std(returns)
        return numerator / (denominator + EPS)


    def calculate_commission(count_bought, price, commission_rate=0.0006):
        return np.abs(count_bought * price * commission_rate)


    def calculate_return(count_before, count_now, price_before, price_now,
                         granularity=1, commission_rate=0.0006):
        ret = count_before * (price_now - price_before)
        commission = calculate_commission(count_bought=count_now - count_before,
                                          price=price_now,
                                          commission_rate=commission_rate)
        return (ret - commission) * granularity


    def calculate_returns(paths, prices, granularity=1, commission_rate=0.0006):
        returns = []
        for time in range(1, prices.shape[1]):
            ret = 0
            for asset in range(0, prices.shape[0]):
                ret += calculate_return(count_before=paths[asset][time - 1],
                                        count_now=paths[asset][time],
                                        price_before=prices[asset][time - 1],
                                        price_now=prices[asset][time],
                                        granularity=granularity,
                                        commission_rate=commission_rate)
            returns.append(ret)
        return np.array(returns)


    def calculate_sharpe_ratio(paths, prices, granularity=GRANULARITY,
                               commission_rate=0.0006):
        returns = calculate_returns(paths, prices, granularity, commission_rate)
        return calculate_sharpe_ratio_from_returns(returns)

    def build_returns_plot(returns, plot_name=None, filename=None):
        cumulative_returns = np.cumsum(returns)
        plt.plot(cumulative_returns)
        plt.xlabel("Time")
        plt.ylabel("Total return")
        if plot_name is not None:
            plt.title(plot_name)
        if filename is None:
            plt.show()
        else:
            plt.savefig(filename)

In [4]:
class State:
    
    @staticmethod
    def get_state(data, prev_states):
        return [data[-4], data[-3], data[-2], data[-1], prev_states[-1]]
    
    @staticmethod
    def dims():
        return 5
    
    @staticmethod
    def data_req():
        return 4

In [5]:
NUM_ACTIONS = 3

n_input = State.dims()
n_hidden_1 = 64
n_hidden_2 = 32
n_hidden_3 = 16
n_out = NUM_ACTIONS

weights = {
    'h1' : tf.Variable(tf.random_uniform([n_input, n_hidden_1], 0, 0.01)),
    'h2' : tf.Variable(tf.random_uniform([n_hidden_1, n_hidden_2], 0, 0.01)),
    'h3' : tf.Variable(tf.random_uniform([n_hidden_2, n_hidden_3], 0, 0.01)),
    'out' : tf.Variable(tf.random_uniform([n_hidden_3, n_out], 0, 0.01))
}


def multilayer_model(x, weights):
    layer_1 = tf.matmul(x, weights['h1'])
    layer_2 = tf.nn.relu(tf.matmul(layer_1, weights['h2']))
    layer_3 = tf.matmul(layer_2, weights['h3'])
#     out_layer = tf.nn.softmax(tf.matmul(layer_3, weights['out']))
    out_layer = tf.matmul(layer_3, weights['out'])
    return out_layer


inputs = tf.placeholder(shape=[None, n_input], dtype=tf.float32)

q_fn = multilayer_model(inputs, weights)

next_q_fn = tf.placeholder(shape=[None, n_out], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(next_q_fn - q_fn))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

init = tf.global_variables_initializer()

In [6]:
def act(action, data, step, prev_states):
    d_data = data[step] - data[step - 1]
    d_state = action - 1 # -1, 0, +1
    state = prev_states + d_state
    reward = state * data[step] - prev_states[-1] * data[step - 1]
    return state, reward

In [7]:
discount_factor = 0.9
exploration_rate = 0.1
NUM_EPOCHS = 300

In [8]:
def run_learning(sess, data, num_epochs=NUM_EPOCHS):
    reward_per_epoch = []
    final_state_per_epoch = []

    for _ in range(num_epochs):
        history = []
        reward_all = 0
        prev_states = [0]
        steps = range(State.data_req(), len(data))
        for step in steps:
            state_vector = [State.get_state(data[:step], prev_states)]

            q = sess.run(q_fn, feed_dict={inputs: state_vector})

            # Exploration
            if np.random.random() < exploration_rate:
                action = [np.random.randint(0, NUM_ACTIONS)]
            else:
                action = np.argmax(q, axis=1)

            state, reward = act(action[0], data, step, prev_states)

            prev_states.append(state)

            new_state_vector = [State.get_state(data[:step + 1], prev_states)]
            new_q = sess.run(q_fn, feed_dict={inputs: new_state_vector})
            max_new_q = np.max(new_q)

            target_q = q
            target_q[0][action[0]] = reward + discount_factor * max_new_q
            
            history.append([*state_vector, *target_q])
        
        state_vectors = [record[0] for record in history]
        target_qs = [record[1] for record in history]
        _ = sess.run([optimizer], feed_dict={inputs: state_vectors, next_q_fn: target_qs})


        final_state_per_epoch.append(prev_states[-1])

    return final_state_per_epoch

In [9]:
import pandas as pd
from os import listdir
def read_data(data_path):
    files = list(filter(lambda x : x.endswith('.csv'), listdir(data_path)))
    prices_per_day = []
    for i in range(len(files)):
        input_data = pd.read_csv(data_path + files[i], sep=';')
        day_prices = np.array(input_data['Open'])
        prices_per_day.append(day_prices)
    return np.array(prices_per_day)

In [10]:
NUM_RUNS = 10

In [11]:
prices_per_day = read_data('data/train/')

mean_income_per_day_per_run = []
with tf.Session() as sess:
    sess.run(init)
    for _ in range(NUM_RUNS):
        mean_final_state_per_day = []
        for prices in prices_per_day:
            final_states_per_epoch = run_learning(sess, prices)
            mean_final_state_per_day.append(np.mean(final_states_per_epoch))
        mean_income_per_day_per_run.append(mean_final_state_per_day)

In [12]:
mean_income_per_day_per_run = np.array(mean_income_per_day_per_run)

In [13]:
results_list = [("d_{}".format(i), mean_income_per_day_per_run[:, i]) for i in range(len(prices_per_day))]
results_dict = dict(results_list) 
df = pd.DataFrame.from_dict(results_dict)

In [14]:
df["mean"] = np.mean(mean_income_per_day_per_run, axis=1)

In [15]:
df

Unnamed: 0,d_0,d_1,d_10,d_11,d_12,d_13,d_14,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,mean
0,0.129633,0.5793,0.206333,-0.0182,1.115733,2.1115,2.0692,2.1288,0.1892,0.040233,0.5282,0.4813,2.267467,1.146367,0.231233,0.88042
1,0.2751,0.6814,0.4636,0.049533,1.1568,-0.399633,2.247033,2.333133,0.225333,-0.0683,0.020433,0.316667,2.669367,1.167367,0.214467,0.75682
2,-0.320967,0.682333,0.498367,-0.031233,1.2555,-2.238567,2.182033,-2.3687,-0.281367,0.248567,0.522067,-0.519467,-2.781633,1.166133,-0.273733,-0.150711
3,-0.3105,0.694833,0.4116,0.028433,1.154267,-2.316333,2.170033,-2.325333,-0.250667,0.150633,0.497067,-0.451167,-2.7538,1.2507,-0.327833,-0.158538
4,-0.289333,0.629767,0.465333,0.0623,1.132,-2.2871,2.228367,-2.273667,-0.271433,0.1614,0.567667,-0.4609,-2.6912,1.186133,-0.249133,-0.13932
5,-0.298867,0.664967,0.4444,0.078267,1.199667,-2.2813,2.216567,-2.2782,-0.272667,0.207,0.552433,-0.429933,-2.7997,1.192933,-0.221933,-0.135091
6,-0.369067,0.671233,0.450033,0.0117,1.1252,-2.2516,2.2836,-2.275033,-0.191867,0.215867,0.546567,-0.475,-2.8227,1.234667,-0.299367,-0.143051
7,-0.204267,0.6473,0.433067,0.0316,1.155567,-2.2349,2.2142,-2.3245,-0.2095,0.224067,0.578833,-0.498867,-2.694833,1.231367,-0.222667,-0.124902
8,-0.340967,0.688867,0.486767,-0.0084,1.185733,-2.2577,2.227133,-2.3255,-0.2883,0.173667,0.493233,-0.505333,-2.7205,1.285433,-0.308333,-0.147613
9,-0.268133,0.6096,0.428033,0.068667,1.182267,-2.2443,2.200067,-2.350067,-0.2533,0.249467,0.467933,-0.471933,-2.6038,1.214333,-0.1955,-0.131111
