In [1]:
import os
import sys
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
%matplotlib inline
sns.set_theme()

In [2]:
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [3]:
from src.strategy.model import Model
from src.strategy.environment import Environment
from src.strategy.agent import Agent
from src.strategy.buffer import Buffer
from src.utils import get_config, read_file
config = get_config.read_yaml()

In [4]:
print("Starting Training...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Starting Training...
Using device: cuda


In [5]:
MODEL_PATH = config['paths']['model_directory']
CAPITAL = config['strategy']['capital']
SYMBOLS = config['data']['symbols']

hp = config['hyperparameters']
NUM_ASSETS = hp['num_assets']
INPUT_DIM = hp['input_dim']
ACTION_DIM = hp['action_dim']
NUM_LSTM_LAYERS = hp['num_lstm_layers']
HIDDEN_STATE_DIM = hp['hidden_state_dim']
ACTOR_HIDDEN_DIM = hp['actor_hidden_dim']
CRITIC_HIDDEN_DIM = hp['critic_hidden_dim']
GAMMA = hp['gamma']
GAE_LAMBDA = hp['gae_lambda']
CLIP_EPSILON = hp['clip_epsilon']
VALUE_LOSS_COEF = hp['value_loss_coef']
ENTROPY_LOSS_COEF = hp['entropy_loss_coef']
LEARNING_RATE = hp['learning_rate']
NUM_EPOCHS = hp['num_epochs']
ROLLOUT_SIZE = hp['rollout_size']
MINI_BATCH_SIZE = hp['mini_batch_size']
SEQUENCE_LENGTH = hp['seq_len']
train_data_norm = read_file.read_merged_training_data()
train_data_unnorm = read_file.read_merged_training_data(False)
print(f'MODEL_PATH: {MODEL_PATH}')
print(f'CAPITAL: {CAPITAL}')
print(f'SYMBOLS: {SYMBOLS}')
print(f'NUM_ASSETS: {NUM_ASSETS}')
print(f'INPUT_DIM: {INPUT_DIM}')
print(f'ACTION_DIM: {ACTION_DIM}')
print(f'NUM_LSTM_LAYERS: {NUM_LSTM_LAYERS}')
print(f'HIDDEN_STATE_DIM: {HIDDEN_STATE_DIM}')
print(f'ACTOR_HIDDEN_DIM: {ACTOR_HIDDEN_DIM}')
print(f'CRITIC_HIDDEN_DIM: {CRITIC_HIDDEN_DIM}')
print(f'GAMMA: {GAMMA}')
print(f'GAE_LAMBDA: {GAE_LAMBDA}')
print(f'CLIP_EPSILON: {CLIP_EPSILON}')
print(f'VALUE_LOSS_COEF: {VALUE_LOSS_COEF}')
print(f'ENTROPY_LOSS_COEF: {ENTROPY_LOSS_COEF}')
print(f'LEARNING_RATE: {LEARNING_RATE}')
print(f'NUM_EPOCHS: {NUM_EPOCHS}')
print(f'ROLLOUT_SIZE: {ROLLOUT_SIZE}')
print(f'BATCH_SIZE: {MINI_BATCH_SIZE}')
print(f'SEQUENCE_LENGTH: {SEQUENCE_LENGTH}')
print(train_data_norm)
print(train_data_unnorm)

MODEL_PATH: models/
CAPITAL: 100000
SYMBOLS: ['ETH/USDT', 'BTC/USDT', 'BNB/USDT', 'SOL/USDT', 'ADA/USDT', 'LINK/USDT', 'XRP/USDT', 'LTC/USDT', 'XLM/USDT']
NUM_ASSETS: 10
INPUT_DIM: 126
ACTION_DIM: 10
NUM_LSTM_LAYERS: 1
HIDDEN_STATE_DIM: 256
ACTOR_HIDDEN_DIM: 64
CRITIC_HIDDEN_DIM: 64
GAMMA: 0.99
GAE_LAMBDA: 0.95
CLIP_EPSILON: 0.2
VALUE_LOSS_COEF: 0.5
ENTROPY_LOSS_COEF: 0.01
LEARNING_RATE: 0.001
NUM_EPOCHS: 10
ROLLOUT_SIZE: 4096
BATCH_SIZE: 128
SEQUENCE_LENGTH: 72
                     ('open', 'ETH')  ('high', 'ETH')  ('low', 'ETH')  \
timestamp                                                               
2020-10-01 00:00:00         1.000000         0.962171        0.986505   
2020-10-01 01:00:00         0.922741         1.000000        0.971660   
2020-10-01 02:00:00         1.000000         1.000000        1.000000   
2020-10-01 03:00:00         1.000000         0.932530        1.000000   
2020-10-01 04:00:00         0.970954         1.000000        1.000000   
...                   

In [6]:
model = Model(INPUT_DIM,
              HIDDEN_STATE_DIM,
              NUM_ASSETS,
              NUM_LSTM_LAYERS,
              ACTOR_HIDDEN_DIM,
              CRITIC_HIDDEN_DIM,)
model

Model(
  (lstm): LSTM(126, 256, batch_first=True)
  (actor_head): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=20, bias=True)
  )
  (critic_head): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [7]:
agent = Agent(model,
              GAMMA,
              GAE_LAMBDA,
              CLIP_EPSILON,
              VALUE_LOSS_COEF,
              ENTROPY_LOSS_COEF,
              LEARNING_RATE,
              MINI_BATCH_SIZE,
              device,
              MODEL_PATH)
agent

<src.strategy.agent.Agent at 0x232f04f6a50>

In [8]:
env = Environment(train_data_unnorm,
                  SEQUENCE_LENGTH,
                  NUM_ASSETS,
                  SYMBOLS,
                  CAPITAL)
env

<src.strategy.environment.Environment at 0x23299286cf0>

In [9]:
env.reset(0)

In [10]:
buffer = Buffer(ROLLOUT_SIZE,
                SEQUENCE_LENGTH,
                INPUT_DIM,
                ACTION_DIM,
                device)
buffer.display()

--- Buffer Contents (all on CPU) ---
states: torch.Size([4097, 72, 126])
actions: torch.Size([4097, 10])
log_probs: torch.Size([4097])
values: torch.Size([4097])
rewards: torch.Size([4097])
dones: torch.Size([4097])
advantages: torch.Size([4097])
returns: torch.Size([4097])
current_step: 0 / 4097


In [11]:
train_data_norm.iloc[0:SEQUENCE_LENGTH]

Unnamed: 0_level_0,"('open', 'ETH')","('high', 'ETH')","('low', 'ETH')","('close', 'ETH')","('volume', 'ETH')","('rsi', 'ETH')","('sma-50', 'ETH')","('sma-100', 'ETH')","('sma-200', 'ETH')","('ema-50', 'ETH')",...,"('volume', 'XLM')","('rsi', 'XLM')","('sma-50', 'XLM')","('sma-100', 'XLM')","('sma-200', 'XLM')","('ema-50', 'XLM')","('ema-100', 'XLM')","('ema-200', 'XLM')","('atr', 'XLM')","('adx', 'XLM')"
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-10-01 00:00:00,1.000000,0.962171,0.986505,0.930535,0.618368,0.904891,0.000000,1.0,1.000000,0.887498,...,0.215989,1.000000,0.004091,0.141368,0.999997,0.466309,0.580954,0.298796,0.257085,0.325054
2020-10-01 01:00:00,0.922741,1.000000,0.971660,1.000000,0.374792,1.000000,0.073192,1.0,1.000000,1.000000,...,0.002807,1.000000,0.061674,0.203590,0.999997,0.638343,0.796008,0.415248,0.064341,0.406695
2020-10-01 02:00:00,1.000000,1.000000,1.000000,1.000000,0.968396,1.000000,0.151500,1.0,1.000000,1.000000,...,0.647335,1.000000,0.121460,0.268277,0.999998,0.869845,0.999995,0.577327,0.249065,0.519852
2020-10-01 03:00:00,1.000000,0.932530,1.000000,0.971104,0.200648,0.953461,0.235908,1.0,1.000000,1.000000,...,0.615060,0.576580,0.153241,0.303039,0.999998,0.982428,0.999995,0.659771,0.499462,0.568566
2020-10-01 04:00:00,0.970954,1.000000,1.000000,0.969040,0.453046,0.949955,0.318741,1.0,1.000000,1.000000,...,0.200664,0.534733,0.182190,0.325396,0.999998,0.999998,0.999996,0.722614,0.372141,0.685574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-03 19:00:00,1.000000,0.988728,1.000000,0.973526,0.054804,0.955075,0.009568,0.0,1.000000,0.049734,...,0.224620,1.000000,0.001139,0.000000,0.991348,0.014990,0.000000,0.000000,0.024520,0.000000
2020-10-03 20:00:00,0.972256,0.927536,0.831776,0.850782,0.052612,0.748864,0.002156,0.0,1.000000,0.065932,...,0.038163,0.721530,0.000000,0.000000,0.957459,0.015793,0.000000,0.000000,0.011186,0.000000
2020-10-03 21:00:00,0.851628,0.752013,0.828271,0.827918,0.000000,0.708317,0.000000,0.0,1.000000,0.080520,...,0.002499,0.732916,0.000000,0.000000,0.950238,0.017282,0.000000,0.000000,0.000000,0.000000
2020-10-03 22:00:00,0.828709,0.677939,0.572430,0.664260,0.757833,0.443429,0.000000,0.0,1.000000,0.077380,...,0.089355,0.557634,0.000000,0.000000,0.918530,0.006948,0.000000,0.000000,0.000000,0.000000


In [12]:
buffer.store_state(train_data_norm.iloc[0:SEQUENCE_LENGTH].values)
buffer.states

tensor([[[1.0000, 0.9622, 0.9865,  ..., 0.2988, 0.2571, 0.3251],
         [0.9227, 1.0000, 0.9717,  ..., 0.4152, 0.0643, 0.4067],
         [1.0000, 1.0000, 1.0000,  ..., 0.5773, 0.2491, 0.5199],
         ...,
         [0.8516, 0.7520, 0.8283,  ..., 0.0000, 0.0000, 0.0000],
         [0.8287, 0.6779, 0.5724,  ..., 0.0000, 0.0000, 0.0000],
         [0.6659, 0.4928, 0.4977,  ..., 0.0000, 0.0122, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.

In [13]:
for step in tqdm(range(ROLLOUT_SIZE)):
    buffer = agent.get_action_and_value(buffer)
    timestep, buffer = env.step(buffer.actions[buffer.current_step_action - 1], buffer)
# buffer = agent.compute_gae(buffer=buffer, next_value=torch.tensor([0.0]), next_done=torch.tensor([0.0]))
buffer.actions

  0%|          | 0/4096 [00:00<?, ?it/s]

tensor([[ 1.4846,  1.4093, -0.3473, -2.3682,  0.1216,  1.6181, -1.3213, -0.1570,
          0.6063, -0.7880]], device='cuda:0')

In [15]:
buffer.actions

tensor([[ 1.4846,  1.4093, -0.3473, -2.3682,  0.1216,  1.6181, -1.3213, -0.1570,
          0.6063, -0.7880]], device='cuda:0')