In [3]:
from env_generator import make_env
train_env = make_env()
from rl.agents.ddpg import DDPGAgent
import numpy as np
import gym
%matplotlib notebook
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, \
Concatenate, Conv3D, MaxPooling2D, Conv2D
from keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

WINDOW_LENGTH = 5

nb_actions= train_env.action_space.shape[0]
stride_time = train_env.state_dim[1] -1 -2

# ------------------ Actor Model ------------------ 
actor = Sequential()
actor.add(Conv3D(2, kernel_size=(1,3,1), 
                 input_shape= (WINDOW_LENGTH,) + train_env.observation_space.shape,
                 activation='relu', data_format="channels_first"))
actor.add(Conv3D(20, kernel_size=(1,1,49)))
actor.add(Conv3D(1, kernel_size=(1,1,1))) 
actor.add(Flatten())
actor.add(Dense(5, activation='softmax'))
print(actor.summary())

# ------------------ Critic Model ------------------ 

action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(WINDOW_LENGTH,) + \
                          train_env.observation_space.shape, 
                          name='observation_input')
x = Conv3D(2, kernel_size=(1,3,1), 
                 input_shape= (WINDOW_LENGTH,) + train_env.observation_space.shape,
                 activation='relu', data_format="channels_first") \
                 (observation_input)
x = Conv3D(20, kernel_size=(1,1,49))(x)
x = Conv3D(1, kernel_size=(1,1,1))(x)

flattened_observation = Flatten()(x)

x = Concatenate()([action_input, flattened_observation])
x = Dense(1, activation='linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

memory = SequentialMemory(limit=1000, window_length=WINDOW_LENGTH)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, 
                                          theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, 
                  actor=actor, critic=critic, 
                  critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, 
                  nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, 
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for 
# show, but this slows down training quite a lot. You can always safely 
# abort the training prematurely using Ctrl + C.
agent.fit(train_env, nb_steps=1000, visualize=True, 
          verbose=2, nb_max_episode_steps=100)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format("abc"), overwrite=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_13 (Conv3D)           (None, 2, 6, 49, 4)       32        
_________________________________________________________________
conv3d_14 (Conv3D)           (None, 2, 6, 1, 20)       3940      
_________________________________________________________________
conv3d_15 (Conv3D)           (None, 2, 6, 1, 1)        21        
_________________________________________________________________
flatten_5 (Flatten)          (None, 12)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 5)                 65        
Total params: 4,058
Trainable params: 4,058
Non-trainable params: 0
_________________________________________________________________
None
__________________________________________________________________________________________________
Layer (type)                    Outp

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

 100/1000: episode: 1, duration: 28.709s, episode steps: 100, steps per second: 3, episode reward: -0.204, mean reward: -0.002 [-0.354, 0.134], mean action: 0.264 [-0.136, 0.766], mean observation: 0.610 [-0.430, 5.722], loss: --, mean_absolute_error: --, mean_q: --
 200/1000: episode: 2, duration: 108.211s, episode steps: 100, steps per second: 1, episode reward: -0.074, mean reward: -0.001 [-0.153, 0.060], mean action: 0.252 [-0.632, 1.452], mean observation: 0.553 [-0.430, 1.028], loss: 0.013132, mean_absolute_error: 0.118417, mean_q: 0.461097
 300/1000: episode: 3, duration: 114.136s, episode steps: 100, steps per second: 1, episode reward: 0.217, mean reward: 0.002 [-0.158, 0.296], mean action: 0.062 [-0.632, 1.181], mean observation: 0.650 [-0.430, 4.936], loss: 0.007648, mean_absolute_error: 0.085386, mean_q: 0.505387
 400/1000: episode: 4, duration: 118.533s, episode steps: 100, steps per second: 1, episode reward: -0.285, mean reward: -0.003 [-0.236, 0.221], mean action: 0.151

In [9]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 0009-9E97

 Directory of C:\Users\ThinkPad\Documents\DSBA\T2\Advanced ML\Reinforcement-learning-on-portfolio-management

2018/03/29  20:08    <DIR>          .
2018/03/29  20:08    <DIR>          ..
2018/03/29  17:50    <DIR>          .ipynb_checkpoints
2018/03/29  17:50    <DIR>          __pycache__
2018/03/29  17:47    <DIR>          data
2018/03/29  19:49            35,288 ddpg_abc_weights_actor.h5f
2018/03/29  19:49            37,840 ddpg_abc_weights_critic.h5f
2018/03/29  18:01             2,698 ddpg_model.py
2018/03/29  20:03            35,160 ddpg_trial1_weights_actor.h5f
2018/03/29  20:03            37,848 ddpg_trial1_weights_critic.h5f
2018/03/29  17:47             3,513 env_generator.py
2018/03/29  18:20           120,500 log.txt
2018/03/29  17:47    <DIR>          management
2018/03/29  17:47    <DIR>          model
2018/03/29  17:47            30,064 model_actor
2018/03/29  17:47           661,616 model_critic
2018/03

In [10]:
agent.load_weights("ddpg_trial1_weights.h5f")

In [13]:
import pandas as pd
df_test = pd.read_hdf("data/df_test.hf", key='test')
df_test.head()

Pair,BTC,BTC,BTC,BTC,BTC,BTC,ETH,ETH,ETH,ETH,...,LTC,LTC,LTC,LTC,XRP,XRP,XRP,XRP,XRP,XRP
Price,close,high,low,open,volume,quoteVolume,close,high,low,open,...,low,open,volume,quoteVolume,close,high,low,open,volume,quoteVolume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-01-14 06:00:00,13868.835938,13915.0,13739.040039,13810.033203,747935.375,54.114883,1366.0,1368.528442,1336.0,1338.050049,...,245.0,246.014999,43207.84375,175.740982,1.904,1.916547,1.859197,1.859197,506813.125,267421.46875
2018-01-14 06:30:00,13853.681641,13876.035156,13805.838867,13874.628906,344971.5,24.921087,1352.953979,1366.0,1342.0,1366.0,...,245.811615,247.665237,41650.449219,168.823837,1.893,1.908461,1.893,1.904515,124487.75,65473.511719
2018-01-14 07:00:00,13776.931641,13869.603516,13706.669922,13853.681641,760802.75,55.233398,1346.450562,1354.900024,1339.539185,1354.900024,...,243.554993,245.811615,87060.664062,355.461975,1.888,1.89998,1.86146,1.893,180365.703125,95928.953125
2018-01-14 07:30:00,13727.0,13828.665039,13665.0,13776.931641,624742.1875,45.52787,1339.124146,1349.183838,1331.0,1346.450562,...,242.550003,245.080002,46777.144531,192.310944,1.874721,1.892209,1.86153,1.888,136563.390625,72880.34375
2018-01-14 08:00:00,13772.0,13812.0,13709.759766,13727.0,538907.625,39.156342,1355.361572,1355.692505,1339.124146,1342.0,...,243.514679,243.514679,113351.40625,461.62558,1.9,1.9,1.87,1.872,159200.25,84280.351562


In [15]:
test_env = make_env(data=df_test)

agent.test(test_env, visualize=True)

Testing for 1 episodes ...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Episode 1: reward: -2.499, steps: 1000


<keras.callbacks.History at 0x242ff358>