In [11]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import pandas as pd
import numpy as np
import MAIN.Basics as basics
import MAIN.Reinforcement as RL

import seaborn as sns
import matplotlib.pyplot as plt
from UTIL import FileIO
from STRATEGY.Benchmark import Benchmark
import statsmodels

In [12]:
import MAIN.Basics as basics

# Read config
config_path  = 'CONFIG/config_train.yml'
config_train = FileIO.read_yaml(config_path)      

In [16]:
# Read prices
x = pd.read_csv('STATICS/PRICE/rya.csv')
y = pd.read_csv('STATICS/PRICE/ryaay.csv')
x, y = Benchmark.clean_data(x, y, 'date', 'close')

In [None]:


# Separate training and testing sets
train_pct = 0.7
train_len = round(len(x) * 0.7)
idx_train = list(range(0, train_len))
idx_test  = list(range(train_len, len(x)))
EG_Train = EGCointegration(x.iloc[idx_train, :], y.iloc[idx_train, :], 'date', 'close')
EG_Test  = EGCointegration(x.iloc[idx_test,  :], y.iloc[idx_test,  :], 'date', 'close')

# Create action space
n_hist    = list(np.arange(600, 1201, 300))
n_forward = list(np.arange(120, 1201, 240))
trade_th  = list(np.arange(1,  3.1, 2))
stop_loss = list(np.arange(1,  2.1, 1))
cl        = list(np.arange(0.05,  0.06, 0.05))
actions   = {'n_hist':    n_hist,
             'n_forward': n_forward,
             'trade_th':  trade_th,
             'stop_loss': stop_loss,
             'cl':        cl}
n_action  = int(np.product([len(actions[key]) for key in actions.keys()]))


In [None]:

# Create state space
transaction_cost = [0.001]
states  = {'transaction_cost': transaction_cost}
n_state = len(states)

# Assign state and action spaces to config
config_train['StateSpaceState'] = states
config_train['ActionSpaceAction'] = actions

# Create and build network
one_hot  = {'one_hot': {'func_name':  'one_hot',
                        'input_arg':  'indices',
                         'layer_para': {'indices': None,
                                        'depth': n_state}}}
output_layer = {'final': {'func_name':  'fully_connected',
                          'input_arg':  'inputs',
                          'layer_para': {'inputs': None,
                                         'num_outputs': n_action,
                                         'biases_initializer': None,
                                         'activation_fn': tf.nn.relu,
                                         'weights_initializer': tf.ones_initializer()}}}

state_in = tf.placeholder(shape=[1], dtype=tf.int32)

N = basics.Network(state_in)
N.build_layers(one_hot)
N.add_layer_duplicates(output_layer, 1)

# Create learning object and perform training
RL_Train = RL.ContextualBandit(N, config_train, EG_Train)



In [None]:
sess = tf.Session()
RL_Train.process(sess, save=False, restore=False)


In [None]:


# Extract training results
action = RL_Train.recorder.record['NETWORK_ACTION']
reward = RL_Train.recorder.record['ENGINE_REWARD']
print(np.mean(reward))

df1 = pd.DataFrame()
df1['action'] = action
df1['reward'] = reward
mean_reward = df1.groupby('action').mean()
mean_reward.describe()
sns.distplot(mean_reward)


In [None]:
# Test by trading continuously
[opt_action] = sess.run([RL_Train.output], feed_dict=RL_Train.feed_dict)
opt_action = np.argmax(opt_action)
action_dict = RL_Train.action_space.convert(opt_action, 'index_to_dict')



In [None]:
indices = range(2500, 4000)

pnl = pd.DataFrame()
pnl['Time'] = EG_Test.timestamp
pnl['Trade_Profit'] = 0
pnl['Cost'] = 0
pnl['N_Trade'] = 0

indices
action_dict = {'n_hist': 400,
 'n_forward': 800,
 'trade_th': 2,
 'stop_loss': 10,
 'cl': 0.1}

In [None]:

import warnings
warnings.filterwarnings('ignore')
for i in indices:
    if i % 100 == 0:
        print(i)
    EG_Test.process(index=i, transaction_cost=0.006, **action_dict)
    trade_record = EG_Test.record
    if (trade_record is not None) and (len(trade_record) > 0):
        #print('value at {}'.format(i))
        trade_record = pd.DataFrame(trade_record)
        # Get costs and position at trade time
        trade_cost   = trade_record.groupby('trade_time')['trade_cost'].sum()
        open_pos     = trade_record.groupby('trade_time')['long_short'].sum()
        
        # Get closing costs 
        close_cost   = trade_record.groupby('close_time')['close_cost'].sum()
        profit       = trade_record.groupby('close_time')['profit'].sum()
        
        close_pos    = trade_record.groupby('close_time')['long_short'].sum() * -1

        pnl['Cost'].loc[pnl['Time'].isin(trade_cost.index)] += trade_cost.values
        pnl['Cost'].loc[pnl['Time'].isin(close_cost.index)] += close_cost.values
        pnl['Trade_Profit'].loc[pnl['Time'].isin(close_cost.index)] += profit.values
        pnl['N_Trade'].loc[pnl['Time'].isin(trade_cost.index)] += open_pos.values
        pnl['N_Trade'].loc[pnl['Time'].isin(close_cost.index)] += close_pos.values

warnings.filterwarnings(action='once')
# Plot the testing result
pnl['PnL'] = (pnl['Trade_Profit'] - pnl['Cost']).cumsum()

In [None]:

plt.plot((EG_Test.y - EG_Test.x * EG_Test.beta)[2500, 4000]

In [None]:
len(EG_Test.y)

In [None]:
pnl['PnL'].describe()
plt.plot(pnl['PnL'])
plt.plot(pnl['N_Trade'])


In [None]:
pnl['PnL'].describe()
plt.plot(pnl['PnL'][3000:4000])
plt.plot(pnl['N_Trade'][3000:4000])


In [None]:
pnl.head()

In [None]:
sess.close()