# Environment(5.25)
This tutorial is to demonstrate to use environment for portfolio management

## Step1: Import Packages

In [1]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings("ignore")
import sys
from pathlib import Path
import os
import torch

ROOT = os.path.dirname(os.path.abspath("."))
sys.path.append(ROOT)

import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.utils import plot
from trademaster.utils import set_seed
import matplotlib.pyplot as plt
set_seed(2023)

2023-05-26 14:13:42,225	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
2023-05-26 14:13:47,509	INFO worker.py:973 -- Calling ray.init() again after it has already been called.
2023-05-26 14:13:47,514	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


## Take a look at Environment

In [2]:
from trademaster.environments.portfolio_management.real_environment import PortfolioManagementRealEnvironment
def print_env(env):
    for k, v in vars(env).items():
        print(k)
        print(v)
        print("       ")
cfg = dict(
    data = dict(
        type = "PortfolioManagementDataset",
        data_path = "data/portfolio_management/sz50",
        train_path = "data/portfolio_management/sz50/train.csv",
        valid_path = "data/portfolio_management/sz50/valid.csv",
        test_path = "data/portfolio_management/sz50/test.csv",
        test_dynamic_path='data/portfolio_management/sz50/test.csv',
        tech_indicator_list = [
            "zopen",
            "zhigh",
            "zlow",
            "zadjcp",
            "zclose",
            "zd_5",
            "zd_10",
            "zd_15",
            "zd_20",
            "zd_25",
            "zd_30"
        ],
        initial_amount = 100000,
        transaction_cost_pct = 0.001
    )
)
cfg=Config(cfg)
dataset = build_dataset(cfg)
cfg2 = dict(dataset=dataset)
env = PortfolioManagementRealEnvironment(cfg2)
print_env(env)
print(env.df.tic.unique())

dataset
<trademaster.datasets.portfolio_management.dataset.PortfolioManagementDataset object at 0x15c514eb0>
       
task
train
       
day
0
       
df_path
/Users/louison/PersonalTrade/data/portfolio_management/sz50/train.csv
       
initial_amount
100000
       
transaction_cost_pct
0.001
       
tech_indicator_list
['zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30']
       
df
       index        date          tic   open  close   high    low   
0      99213  2009-01-06  600010.XSHG   0.93   0.97   0.98   0.92  \
0      96295  2009-01-06  600028.XSHG   2.82   2.92   2.93   2.80   
0      70033  2009-01-06  600030.XSHG   8.77   9.09   9.17   8.75   
0       8755  2009-01-06  600031.XSHG   2.57   2.83   2.83   2.56   
0       5837  2009-01-06  600036.XSHG   5.73   5.93   5.97   5.69   
...      ...         ...          ...    ...    ...    ...    ...   
2333   43186  2018-08-08  603259.XSHG  31.75  30.51  31.89  30.37   
2333   69448  20

## Play a single step with a random agent

In [3]:
import numpy as np
def agent(action_size):
    def softmax(x):
        """Compute softmax values for each sets of scores in x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    return softmax(np.random.rand(action_size))
action = (agent(env.action_space_shape))
obs, reward, done, _ = env.step(action)
print(obs.shape)
print(obs)
print(obs.shape)
print_env(env)

(1, 11, 50)
[[[ 2.10526316e-02  1.74825175e-02  7.77777778e-03  1.38408304e-02
    1.20068611e-02  1.50943396e-02 -3.84615385e-03 -2.28571429e-02
    1.14006515e-02 -3.36700337e-03  0.00000000e+00 -1.23456790e-02
    7.28155340e-03 -3.03951368e-02  1.99938004e-02  1.24223602e-02
   -8.84955752e-03 -1.05263158e-02  2.18579235e-02 -3.70370370e-02
    1.42857143e-02 -5.69800570e-03  0.00000000e+00 -6.00375235e-02
    0.00000000e+00 -3.91280045e-03 -3.91280045e-03 -4.27533134e-03
    2.36220472e-02  5.91715976e-03  4.72440945e-03 -1.79012346e-02
   -3.47222222e-03  2.25988701e-02 -2.51716247e-02 -6.32111252e-04
   -2.69241229e-02 -1.61333692e-02 -6.60792952e-03 -2.18805441e-02
    1.23966942e-02 -5.10204082e-03  1.59362550e-02 -1.35823430e-02
   -1.80790960e-02 -5.65107290e-02 -2.13491209e-02 -7.66782407e-03
   -9.56685057e-03 -9.19275124e-02]
  [ 3.15789474e-02  2.09790210e-02  2.22222222e-02  2.76816609e-02
    4.11663808e-02  3.01886792e-02  1.92307692e-02  1.71428571e-02
    3.09446254

## Play multiple steps with the random agent

In [None]:
env.reset()
while True:
    obs, reward, done, _ = env.step(agent(env.action_space_shape))
    print("        ")
    print(env.date_memory[-1])
    print(env.asset_memory[-1])
    if done:
        print_env(env)
        break
# plot the result
from datetime import datetime
date_objects = [datetime.strptime(date_str, '%Y-%m-%d') for date_str in env.date_memory]
plt.plot(date_objects, env.asset_memory)
plt.show()


(1, 11, 50)
        
2009-01-07
99748.83257804734
(2, 11, 50)
        
2009-01-08
98358.52068735348
(3, 11, 50)
        
2009-01-09
99709.93231682738
(4, 11, 50)
        
2009-01-12
100114.78261917605
(5, 11, 50)
        
2009-01-13
98637.5289064303
(6, 11, 50)
        
2009-01-14
101706.09746188065
(7, 11, 50)
        
2009-01-15
101828.4978774211
(8, 11, 50)
        
2009-01-16
103317.2497385758
(9, 11, 50)
        
2009-01-19
103570.4624806396
(10, 11, 50)
        
2009-01-20
104539.79785316518
(11, 11, 50)
        
2009-01-21
104313.35766479319
(12, 11, 50)
        
2009-01-22
105281.43737468997
(13, 11, 50)
        
2009-01-23
105131.29226446392
(14, 11, 50)
        
2009-02-02
106470.34192609902
(15, 11, 50)
        
2009-02-03
108405.1422271832
(16, 11, 50)
        
2009-02-04
110035.6836286985
(17, 11, 50)
        
2009-02-05
109432.36243965762
(18, 11, 50)
        
2009-02-06
112038.7470987225
(19, 11, 50)
        
2009-02-09
114106.27258779033
(20, 11, 50)
        
2009-02-10

KeyboardInterrupt: 

# How to define your personal model (Different levels of customizability)

## Customize by passing model_config

In [4]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models import MODEL_DEFAULTS
model = ModelCatalog.get_model_v2(env.observation_space, env.action_space, int(env.action_space.shape[0]), model_config=MODEL_DEFAULTS, framework="torch")
print(model)

ComplexInputNetwork(
  (post_fc_stack): FullyConnectedNetwork(
    (_hidden_layers): Sequential()
    (_value_branch_separate): Sequential()
    (_value_branch): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=256, out_features=1, bias=True)
      )
    )
  )
  (logits_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=51, bias=True)
    )
  )
  (value_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=1, bias=True)
    )
  )
)


## Customize by defining your own class

#### First let's get familiar with the API

In [5]:
from ray.rllib.models.torch.complex_input_net import ComplexInputNetwork
ModelCatalog.register_custom_model(model_name="cust_model", model_class=ComplexInputNetwork)
model2 = ModelCatalog.get_model_v2(env.observation_space, env.action_space, int(env.action_space.shape[0]), model_config=MODEL_DEFAULTS, framework="torch", name="cust_model")
print(model2)

ComplexInputNetwork(
  (post_fc_stack): FullyConnectedNetwork(
    (_hidden_layers): Sequential()
    (_value_branch_separate): Sequential()
    (_value_branch): SlimFC(
      (_model): Sequential(
        (0): Linear(in_features=256, out_features=1, bias=True)
      )
    )
  )
  (logits_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=51, bias=True)
    )
  )
  (value_layer): SlimFC(
    (_model): Sequential(
      (0): Linear(in_features=256, out_features=1, bias=True)
    )
  )
)


#### We can then start to build our own class of Network
The follow code are modified from this class:
ray.rllib.models.torch.complex_input_net.ComplexInputNetwork

In [100]:

from gym.spaces import Box, Discrete, MultiDiscrete
import numpy as np
import tree  # pip install dm_tree

# TODO (sven): add IMPALA-style option.
# from ray.rllib.examples.models.impala_vision_nets import TorchImpalaVisionNet
from ray.rllib.models.torch.misc import (
    normc_initializer as torch_normc_initializer,
    SlimFC,
)
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.modelv2 import ModelV2, restore_original_dimensions
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.utils import get_filter_config
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.spaces.space_utils import flatten_space
from ray.rllib.utils.torch_utils import one_hot
from einops import rearrange

torch, nn = try_import_torch()
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class Stok(TorchModelV2, nn.Module):
    """TorchModelV2 concat'ing CNN outputs to flat input(s), followed by FC(s).

    Note: This model should be used for complex (Dict or Tuple) observation
    spaces that have one or more image components.

    The data flow is as follows:

    `obs` (e.g. Tuple[img0, img1, discrete0]) -> `CNN0 + CNN1 + ONE-HOT`
    `CNN0 + CNN1 + ONE-HOT` -> concat all flat outputs -> `out`
    `out` -> (optional) FC-stack -> `out2`
    `out2` -> action (logits) and vaulue heads.
    """

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        # input shape of (B, L, H, W)
        # lstm input shape of (B*W, L, H)
        # lstm output shape of (B*W, L, H1)
        # takes the last of lstm (B*W, H1)
        # reshape to (B, H1, W)
        # pass to whatever is after
        self.original_space = (
            obs_space.original_space
            if hasattr(obs_space, "original_space")
            else obs_space
        )


        nn.Module.__init__(self)
        TorchModelV2.__init__(
            self, self.original_space, action_space, num_outputs, model_config, name
        )
        self.lstm_cell_size = 512

        self.flattened_input_space = flatten_space(self.original_space)
        self.lstm = torch.nn.LSTM(self.flattened_input_space[0].shape[-2], self.lstm_cell_size, 3)

        # size = int(np.product(self.flattened_input_space[0].shape))
        size = int((self.flattened_input_space[0].shape[-1]*self.lstm_cell_size))
        # Optional post-concat FC-stack.
        post_fc_stack_config = {
            "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []),
            "fcnet_activation": model_config.get("post_fcnet_activation", "relu"),
        }
        self.post_fc_stack = ModelCatalog.get_model_v2(
            Box(float("-inf"), float("inf"), shape=(size,), dtype=np.float32),
            self.action_space,
            None,
            post_fc_stack_config,
            framework="torch",
            name="post_fc_stack",
        )

        # Actions and value heads.
        self.logits_layer = None
        self.value_layer = None
        self._value_out = None

        if num_outputs:
            # Action-distribution head.
            self.logits_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=num_outputs,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01),
            )
            # Create the value branch model.
            self.value_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=1,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01),
            )
        else:
            self.num_outputs = size

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        # input shape of (B, L, H, W)
        # lstm input shape of (B*W, L, H)
        # lstm output shape of (B*W, L, H1)
        # takes the last of lstm (B*W, H1)
        # reshape to (B, H1, W)
        # pass to whatever is after
        H1 = self.lstm_cell_size
        # print(input_dict[SampleBatch.OBS].shape)
        ori_input = input_dict[SampleBatch.OBS]
        if(len(ori_input.shape) == 3):
            ori_input = ori_input.unsqueeze(0)
        elif len(ori_input.shape) == 1:
            ori_input = [torch.Tensor(ori).flip(0) for ori in ori_input]
            ori_input = torch.Tensor(torch.nn.utils.rnn.pad_sequence(ori_input, batch_first=True)).flip(1)

        B, L, H, W = ori_input.shape
        print(ori_input.shape)
        lstm_input = rearrange(ori_input, "b l h w -> b w l h").reshape(B*W, L, H)
        lstm_out_last = self.lstm(lstm_input)[0][:, -1, :].squeeze(-2)
        mod_input = rearrange(lstm_out_last.view(B, W, H1), "b w h -> b h w")


        if SampleBatch.OBS in input_dict and "obs_flat" in input_dict:
            orig_obs = input_dict[SampleBatch.OBS]
        else:
            orig_obs = restore_original_dimensions(
                input_dict[SampleBatch.OBS], self.processed_obs_space, tensorlib="torch"
            )
        # Push observations through the different components
        # (CNNs, one-hot + FC, etc..).
        outs = []
        # outs.append(tree.flatten(orig_obs)[0])
        outs.append(tree.flatten(mod_input)[0])

        # Concat all outputs and the non-image inputs.
        out = torch.cat(outs, dim=1)
        # Push through (optional) FC-stack (this may be an empty stack).
        out, _ = self.post_fc_stack(SampleBatch({SampleBatch.OBS: out}))

        # No logits/value branches.
        if self.logits_layer is None:
            return out, []

        # Logits- and value branches.
        logits, values = self.logits_layer(out), self.value_layer(out)
        self._value_out = torch.reshape(values, [-1])
        return logits, []

    @override(ModelV2)
    def value_function(self):
        return self._value_out


In [101]:
# model2 = Stok(env.observation_space, env.action_space, int(env.action_space.shape[0]), model_config=MODEL_DEFAULTS, name="fuck")
# print(model2)
from ray.rllib.agents.pg import PGTrainer 
from ray.tune.registry import register_env
import ray
ModelCatalog.register_custom_model(model_name="cust_model", model_class=Stok)
trainer_cfg = dict(
    rollout_fragment_length = 5,
    train_batch_size = 5, 
    # explore = True,
    framework = "torch",
    disable_env_checking = True,
    model = {
    #     # use_attention = True,
    #     "use_lstm": True,
    #     # Max seq len for training the LSTM, defaults to 20.
        # "max_seq_len": 5,
    #     # Size of the LSTM cell.
    #     "lstm_cell_size": 1000,
    #     # Whether to feed a_{t-1} to LSTM (one-hot encoded if discrete).
    #     # "lstm_use_prev_action": True,
    #     # Whether to feed r_{t-1} to LSTM.
    #     "lstm_use_prev_reward": False,
    #     # Whether the LSTM is time-major (TxBx..) or batch-major (BxTx..).
        "custom_model": "cust_model", 
    }
    
)
ray.init(ignore_reinit_error=True)
trainer_cfg["env"] = "portfolio_real_management"
trainer_cfg["env_config"] = dict(dataset=dataset, task="train", device="cpu")
register_env("portfolio_real_management", lambda config: PortfolioManagementRealEnvironment(config))
pg_trainer = PGTrainer(trainer_cfg, env="portfolio_real_management")

2023-05-26 16:44:55,302	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


torch.Size([1, 32, 11, 50])
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0.,



In [103]:
for e in range(30):
    print(e)
    pg_trainer.train()

0
torch.Size([1, 15, 11, 50])
torch.Size([1, 16, 11, 50])


KeyboardInterrupt: 