# Models(5.18)
This tutorial is to demonstrate to use simple policy gradient agents for portfolio management, specifically we deal with different model and how to visualize and train them today

## Step1: Import Packages

In [1]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings("ignore")
import sys
from pathlib import Path
import os
import torch

ROOT = os.path.dirname(os.path.abspath("."))
sys.path.append(ROOT)

import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.utils import plot
from trademaster.utils import set_seed
import matplotlib.pyplot as plt
set_seed(2023)

2023-05-23 21:46:34,844	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2023-05-23 21:46:39,838	INFO worker.py:973 -- Calling ray.init() again after it has already been called.
2023-05-23 21:46:39,849	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


## Take a look at Environment

In [2]:
from trademaster.environments.portfolio_management.environment import PortfolioManagementEnvironment
cfg = dict(
    data = dict(
        type = "PortfolioManagementDataset",
        data_path = "data/portfolio_management/sz50",
        train_path = "data/portfolio_management/sz50/train.csv",
        valid_path = "data/portfolio_management/sz50/valid.csv",
        test_path = "data/portfolio_management/sz50/test.csv",
        test_dynamic_path='data/portfolio_management/sz50/test.csv',
        tech_indicator_list = [
            "zopen",
            "zhigh",
            "zlow",
            "zadjcp",
            "zclose",
            "zd_5",
            "zd_10",
            "zd_15",
            "zd_20",
            "zd_25",
            "zd_30"
        ],
        initial_amount = 100000,
        transaction_cost_pct = 0.001
    )
)
cfg=Config(cfg)
dataset = build_dataset(cfg)
cfg2 = dict(dataset=dataset)
env = PortfolioManagementEnvironment(cfg2)


## Play a single step with a random agent

In [3]:
from ray.rllib.agents.pg import PGTrainer 
from ray.tune.registry import register_env
from ray.rllib.models import ModelCatalog
import ray
trainer_cfg = dict(
    rollout_fragment_length = 200,
    # explore = True,
    framework = "torch",
    model = {
        "post_fcnet_hiddens" : [30, 520, 321]
    #     # use_attention = True,
    #     "use_lstm": True,
    #     # Max seq len for training the LSTM, defaults to 20.
    #     "max_seq_len": 200,
    #     # Size of the LSTM cell.
    #     "lstm_cell_size": 1000,
    #     # Whether to feed a_{t-1} to LSTM (one-hot encoded if discrete).
    #     # "lstm_use_prev_action": True,
    #     # Whether to feed r_{t-1} to LSTM.
    #     "lstm_use_prev_reward": False,
    #     # Whether the LSTM is time-major (TxBx..) or batch-major (BxTx..).
    }
    
)
ray.init(ignore_reinit_error=True)
trainer_cfg["env"] = "portfolio_management"
trainer_cfg["env_config"] = dict(dataset=dataset, task="train", device="cpu")
register_env("portfolio_management", lambda config: PortfolioManagementEnvironment(config))
pg_trainer = PGTrainer(trainer_cfg, env="portfolio_management")

2023-05-23 21:46:55,213	INFO worker.py:973 -- Calling ray.init() again after it has already been called.
2023-05-23 21:46:55,285	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


## Print the model
note that this can only be done with pytorch version of the model

In [4]:
pol = pg_trainer.get_policy()
print(pol)
# way1 to print the model paramters
for k, v in pol.get_weights().items():
    print(k, v.shape)
# way2 to print the model
model = pol.model
print(model)
# beside the defined model, 
# we also have flatten and various other predefined components
# which you can look for further details in 
# 'ray.rllib.models.torch.complex_input_net.ComplexInputNetwork'
print(model.flatten)
print(model.one_hot)
print(model.cnns)
# if you want to look into the source code for prototype of the model see here
print(type(model)) # of class 'ray.rllib.models.torch.complex_input_net.ComplexInputNetwork'

PGTorchPolicy
post_fc_stack._hidden_layers.0._model.0.weight (30, 256)
post_fc_stack._hidden_layers.0._model.0.bias (30,)
post_fc_stack._hidden_layers.1._model.0.weight (520, 30)
post_fc_stack._hidden_layers.1._model.0.bias (520,)
post_fc_stack._hidden_layers.2._model.0.weight (321, 520)
post_fc_stack._hidden_layers.2._model.0.bias (321,)
post_fc_stack._value_branch_separate.0._model.0.weight (30, 256)
post_fc_stack._value_branch_separate.0._model.0.bias (30,)
post_fc_stack._value_branch_separate.1._model.0.weight (520, 30)
post_fc_stack._value_branch_separate.1._model.0.bias (520,)
post_fc_stack._value_branch_separate.2._model.0.weight (321, 520)
post_fc_stack._value_branch_separate.2._model.0.bias (321,)
post_fc_stack._value_branch._model.0.weight (1, 321)
post_fc_stack._value_branch._model.0.bias (1,)
logits_layer._model.0.weight (102, 321)
logits_layer._model.0.bias (102,)
value_layer._model.0.weight (1, 321)
value_layer._model.0.bias (1,)
ComplexInputNetwork(
  (post_fc_stack): Fu

## Try and construct and pass a sample_batch into the model

In [5]:
obs = env.reset()
print(obs.shape)
sample_batch = (dict(obs = torch.Tensor([obs, obs])), None, None)
model_output = (model(*sample_batch))
print(model_output, model_output[0].shape)

(11, 50)
(tensor([[-2.5922e-04,  3.7101e-04, -2.4454e-04, -1.4447e-04,  1.9354e-04,
          2.4718e-04, -5.6721e-04,  3.5246e-04,  4.6053e-05,  6.9738e-04,
          3.2152e-04, -1.9862e-04,  1.3373e-04,  7.9480e-05,  4.3697e-04,
          2.3909e-04,  1.6504e-04,  9.2378e-04, -1.7482e-04, -6.6044e-05,
          3.6818e-04,  1.5503e-04,  1.0421e-03, -2.6726e-06,  1.6905e-04,
          5.0499e-04,  1.5690e-04,  3.1352e-04, -2.2751e-05, -4.5401e-04,
          7.2833e-05,  5.3493e-04,  5.8630e-05, -5.0553e-04, -8.5027e-05,
          1.7867e-04, -3.0604e-05,  3.9327e-04, -3.9780e-04, -1.5325e-04,
         -2.6746e-05, -2.7179e-04, -8.5194e-04,  3.7674e-04, -3.5922e-04,
          3.2475e-04,  1.0711e-04,  6.9320e-04,  4.3957e-04,  4.9434e-05,
          6.1676e-04, -1.6000e-04,  4.2720e-05,  5.0777e-04,  5.1159e-04,
         -4.6186e-04, -2.8623e-04, -3.6844e-04,  8.1008e-04,  4.6258e-05,
          1.4441e-04,  1.7621e-04, -4.9282e-04, -4.1605e-04,  7.9114e-04,
          2.8525e-05, -5.577

In [6]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models import MODEL_DEFAULTS
# model = ModelCatalog.get_model_v2(env.observation_space, env.action_space, int(env.action_space.shape[0]), model_config=MODEL_DEFAULTS)
model = ModelCatalog.get_model_v2(env.observation_space, env.action_space, int(env.action_space.shape[0]), model_config=MODEL_DEFAULTS, framework="torch")
print(model)

ComplexInputNetwork(
  (post_fc_stack): FullyConnectedNetwork(
    (_hidden_layers): Sequential()
    (_value_branch_separate): Sequential()
    (_value_branch): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=256, out_features=1, bias=True)
      )
    )
  )
  (logits_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=51, bias=True)
    )
  )
  (value_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=1, bias=True)
    )
  )
)


## Let's get a more complex model and try to train!

In [7]:
trainer_cfg = dict(
    rollout_fragment_length = 200,
    # explore = True,
    framework = "torch",
    model = {
        # "post_fcnet_hiddens" : [256, 256],
        # use_attention = True,
        "use_lstm": True,
        # Max seq len for training the LSTM, defaults to 20.
        "max_seq_len": 1000,
        # Size of the LSTM cell.
        "lstm_cell_size": 64,
        # Whether to feed a_{t-1} to LSTM (one-hot encoded if discrete).
        "lstm_use_prev_action": True,
        # Whether to feed r_{t-1} to LSTM.
        "lstm_use_prev_reward": False,
        # Whether the LSTM is time-major (TxBx..) or batch-major (BxTx..).
    }
    
)
ray.init(ignore_reinit_error=True)
trainer_cfg["env"] = "portfolio_management"
trainer_cfg["env_config"] = dict(dataset=dataset, task="train", device="cpu")
pg_trainer2 = PGTrainer(trainer_cfg, env="portfolio_management")
pol = pg_trainer2.get_policy()
model = pol.model
print(type(model))
print(model)
print(model.flatten)

2023-05-23 21:46:57,646	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


<class 'ray.rllib.models.catalog.ComplexInputNetwork_as_LSTMWrapper'>
ComplexInputNetwork_as_LSTMWrapper(
  (post_fc_stack): FullyConnectedNetwork(
    (_hidden_layers): Sequential()
    (_value_branch_separate): Sequential()
    (_value_branch): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=256, out_features=1, bias=True)
      )
    )
  )
  (lstm): LSTM(307, 64, batch_first=True)
  (_logits_branch): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=64, out_features=102, bias=True)
    )
  )
  (_value_branch): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=64, out_features=1, bias=True)
    )
  )
)
{0: FullyConnectedNetwork(
  (_hidden_layers): Sequential(
    (0): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=550, out_features=256, bias=True)
        (1): Tanh()
      )
    )
    (1): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=256, out_features=256, bias=True)
        (1): Tanh()
     

In [35]:
for e in range(30):
    pg_trainer2.train()

+--------------+-------------+------------+--------------+
| Total Return | Sharp Ratio | Volatility | Max Drawdown |
+--------------+-------------+------------+--------------+
| -84.527355%  |  -0.903878  | 1.263258%  |  88.296971%  |
+--------------+-------------+------------+--------------+
+--------------+-------------+------------+--------------+
| Total Return | Sharp Ratio | Volatility | Max Drawdown |
+--------------+-------------+------------+--------------+
| -89.076497%  |  -1.104029  | 1.250717%  |  91.691385%  |
+--------------+-------------+------------+--------------+


## Save the model

In [36]:
from trademaster.utils import get_attr, save_object, load_object

# model level saving & loading
# torch.save(model.state_dict(), "model.pkl")
# model.load_state_dict(torch.load("model.pkl"))

# trainer level saving & loading
obj = pg_trainer2.save_to_object()
save_object(obj, "pg_trainer2.pkl")
obj2 = load_object("pg_trainer2.pkl")
pg_trainer2.restore_from_object(obj2)


2023-05-23 22:00:02,353	INFO trainable.py:588 -- Restored on 127.0.0.1 from checkpoint: /Users/louison/ray_results/PGTrainer_portfolio_management_2023-05-23_21-46-575xokja1h/tmpxwbqvldkrestore_from_object/checkpoint-186
2023-05-23 22:00:02,354	INFO trainable.py:597 -- Current state after restoring: {'_iteration': 186, '_timesteps_total': None, '_time_total': 176.66166019439697, '_episodes_total': 15}


In [38]:

print((pg_trainer2.get_weights()))

{'default_policy': {'post_fc_stack._value_branch._model.0.weight': array([[ 6.41568404e-05, -2.51063902e-04, -3.25971458e-04,
         1.91634346e-04,  1.09232118e-04,  3.13350436e-04,
         2.96070561e-04,  5.85079775e-04, -2.92220793e-04,
        -3.96755582e-04, -6.15960860e-04, -5.65996917e-04,
         7.21045886e-04, -2.56219180e-04,  5.51059376e-04,
         1.62565833e-04, -2.35275191e-04, -5.10079881e-05,
        -8.52191704e-04, -1.43573052e-04,  9.59248282e-04,
         1.48139632e-04, -1.20068411e-03,  7.11641333e-04,
        -6.40437211e-05, -3.20654763e-05, -5.31546131e-04,
        -4.12736350e-04,  2.87268194e-04, -8.39275774e-04,
         2.91206321e-04, -5.56935156e-05, -8.24446674e-04,
        -2.42925293e-04,  6.03552093e-04,  6.91078429e-04,
        -6.10015937e-04,  2.36018837e-04, -1.80255694e-04,
        -4.48258885e-04, -1.08949069e-04,  2.01994859e-04,
        -3.09574360e-04, -5.24486764e-04, -2.68371194e-04,
        -6.65511761e-05, -1.03700768e-04, -3.582

In [34]:

print((pg_trainer2.get_weights()))
# print(next(pg_trainer2.get_policy().model.parameters()))

Parameter containing:
tensor([[ 6.4157e-05, -2.5106e-04, -3.2597e-04,  1.9163e-04,  1.0923e-04,
          3.1335e-04,  2.9607e-04,  5.8508e-04, -2.9222e-04, -3.9676e-04,
         -6.1596e-04, -5.6600e-04,  7.2105e-04, -2.5622e-04,  5.5106e-04,
          1.6257e-04, -2.3528e-04, -5.1008e-05, -8.5219e-04, -1.4357e-04,
          9.5925e-04,  1.4814e-04, -1.2007e-03,  7.1164e-04, -6.4044e-05,
         -3.2065e-05, -5.3155e-04, -4.1274e-04,  2.8727e-04, -8.3928e-04,
          2.9121e-04, -5.5694e-05, -8.2445e-04, -2.4293e-04,  6.0355e-04,
          6.9108e-04, -6.1002e-04,  2.3602e-04, -1.8026e-04, -4.4826e-04,
         -1.0895e-04,  2.0199e-04, -3.0957e-04, -5.2449e-04, -2.6837e-04,
         -6.6551e-05, -1.0370e-04, -3.5825e-04, -1.1332e-03, -1.0718e-03,
         -2.1757e-04,  1.2615e-03, -1.3245e-04, -7.0127e-04, -3.0714e-04,
         -4.1893e-04,  1.3233e-04,  3.0927e-04,  9.9125e-04,  5.5904e-04,
          8.0387e-05, -3.8817e-04, -3.0286e-04, -4.6688e-04,  2.9431e-04,
         -5.0100

## How to define your personal model