# PPO for Portfolio Management
This tutorial is to demonstrate an example of using PPO to do portfolio management

## Step1: Import Packages

In [1]:

import warnings
warnings.filterwarnings("ignore")
import sys
from pathlib import Path
import os
import torch

ROOT = os.path.dirname(os.path.abspath("."))
sys.path.append(ROOT)

import argparse
import os.path as osp
from mmcv import Config
from trademaster.utils import replace_cfg_vals
from trademaster.nets.builder import build_net
from trademaster.environments.builder import build_environment
from trademaster.datasets.builder import build_dataset
from trademaster.agents.builder import build_agent
from trademaster.optimizers.builder import build_optimizer
from trademaster.losses.builder import build_loss
from trademaster.trainers.builder import build_trainer
from trademaster.utils import plot
from trademaster.utils import set_seed
set_seed(2023)

2023-05-11 18:16:02,932	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


## Step2: Import Configs

In [11]:
parser = argparse.ArgumentParser(description='Download Alpaca Datasets')
# parser.add_argument("--config", default=osp.join(ROOT, "configs", "portfolio_management", "portfolio_management_sse500_sarl_sarl_adam_mse.py"),
# parser.add_argument("--config", default=osp.join(ROOT, "configs", "portfolio_management", "portfolio_management_exchange_ppo_ppo_adam_mse.py"),
parser.add_argument("--config", default=osp.join(ROOT, "configs", "portfolio_management", "portfolio_management_sz50_ppo_ppo_adam_mse.py"),
                    help="download datasets config file path")
parser.add_argument("--task_name", type=str, default="train")
args, _ = parser.parse_known_args()

cfg = Config.fromfile(args.config)
task_name = args.task_name
cfg = replace_cfg_vals(cfg)
print(cfg)

Config (path: /Users/louison/PersonalTrade/configs/portfolio_management/portfolio_management_sz50_ppo_ppo_adam_mse.py): {'data': {'type': 'PortfolioManagementDataset', 'data_path': 'data/portfolio_management/sz50', 'train_path': 'data/portfolio_management/sz50/train.csv', 'valid_path': 'data/portfolio_management/sz50/valid.csv', 'test_path': 'data/portfolio_management/sz50/test.csv', 'tech_indicator_list': ['zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30'], 'length_day': 10, 'initial_amount': 100000, 'transaction_cost_pct': 0.001, 'test_dynamic_path': 'data/portfolio_management/sz50/test.csv'}, 'environment': {'type': 'PortfolioManagementEnvironment'}, 'trainer': {'type': 'PortfolioManagementTrainer', 'agent_name': 'ppo', 'if_remove': False, 'configs': {'framework': 'tf2', 'num_workers': 0}, 'work_dir': 'work_dir/portfolio_management_sz50_ppo_ppo_adam_mse', 'epochs': 10}, 'loss': {'type': 'MSELoss'}, 'optimizer': {'type': 'Adam', 'lr': 0

## Step3: Build Dataset

In [12]:
dataset = build_dataset(cfg)
print(vars(dataset))


{'kwargs': {'data_path': 'data/portfolio_management/sz50', 'train_path': 'data/portfolio_management/sz50/train.csv', 'valid_path': 'data/portfolio_management/sz50/valid.csv', 'test_path': 'data/portfolio_management/sz50/test.csv', 'tech_indicator_list': ['zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10', 'zd_15', 'zd_20', 'zd_25', 'zd_30'], 'length_day': 10, 'initial_amount': 100000, 'transaction_cost_pct': 0.001, 'test_dynamic_path': 'data/portfolio_management/sz50/test.csv'}, 'data_path': '/Users/louison/PersonalTrade/data/portfolio_management/sz50', 'train_path': '/Users/louison/PersonalTrade/data/portfolio_management/sz50/train.csv', 'valid_path': '/Users/louison/PersonalTrade/data/portfolio_management/sz50/valid.csv', 'test_path': '/Users/louison/PersonalTrade/data/portfolio_management/sz50/test.csv', 'test_dynamic_path': '/Users/louison/PersonalTrade/data/portfolio_management/sz50/test.csv', 'tech_indicator_list': ['zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5',

## Step4: Build Trainer

In [9]:
from ray.tune.registry import register_env
import ray
from trademaster.environments.portfolio_management.environment import PortfolioManagementEnvironment
def env_creator(env_name):
    if env_name == 'portfolio_management':
        env = PortfolioManagementEnvironment
    else:
        raise NotImplementedError
    return env
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
work_dir = os.path.join(ROOT, cfg.trainer.work_dir)
ray.init(ignore_reinit_error=True)
register_env("portfolio_management", lambda config: env_creator("portfolio_management")(config))
if not os.path.exists(work_dir):
    os.makedirs(work_dir)
cfg.dump(osp.join(work_dir, osp.basename(args.config)))

trainer = build_trainer(cfg, default_args=dict(dataset=dataset, device = device))
print(vars(trainer))

2023-05-11 18:16:45,555	INFO worker.py:973 -- Calling ray.init() again after it has already been called.


| Arguments Keep work_dir: /Users/louison/PersonalTrade/work_dir/portfolio_management_exchange_ppo_ppo_adam_mse
{'device': device(type='cpu'), 'configs': {'framework': 'tf2', 'num_workers': 0, 'env': 'portfolio_management', 'env_config': {'dataset': <trademaster.datasets.portfolio_management.dataset.PortfolioManagementDataset object at 0x1f3b4a2e0>, 'task': 'train'}}, 'agent_name': 'ppo', 'epochs': 10, 'dataset': <trademaster.datasets.portfolio_management.dataset.PortfolioManagementDataset object at 0x1f3b4a2e0>, 'work_dir': '/Users/louison/PersonalTrade/work_dir/portfolio_management_exchange_ppo_ppo_adam_mse', 'seeds_list': (12345,), 'random_seed': 12345, 'if_remove': False, 'num_threads': 8, 'trainer_name': <class 'ray.rllib.agents.ppo.ppo.PPOTrainer'>, 'checkpoints_path': '/Users/louison/PersonalTrade/work_dir/portfolio_management_exchange_ppo_ppo_adam_mse/checkpoints'}


## Step5: Train, Valid and Test

In [10]:
trainer.train_and_valid()



0
         date     close                   tic      open      high       low   
0  2000-01-04  0.656211     AUSTRALIAN DOLLAR  0.656211  0.656211  0.656211  \
0  2000-01-04  0.026932                  BAHT  0.026932  0.026932  0.026932   
0  2000-01-04  0.688800       CANADIAN DOLLAR  0.688800  0.688800  0.688800   
0  2000-01-04  0.138543          DANISH KRONE  0.138543  0.138543  0.138543   
0  2000-01-04  1.030928                  EURO  1.030928  1.030928  1.030928   
0  2000-01-04  0.642467                 FRANC  0.642467  0.642467  0.642467   
0  2000-01-04  0.128576      HONG KONG DOLLAR  0.128576  0.128576  0.128576   
0  2000-01-04  0.022962          INDIAN RUPEE  0.022962  0.022962  0.022962   
0  2000-01-04  0.119617                 KRONA  0.119617  0.119617  0.119617   
0  2000-01-04  0.105742          MEXICAN PESO  0.105742  0.105742  0.105742   
0  2000-01-04  0.032680     NEW TAIWAN DOLLAR  0.032680  0.032680  0.032680   
0  2000-01-04  0.519805     NEW ZELAND DOLLAR  0.5



Train Episode: [1/10]


KeyboardInterrupt: 

rllib uses distributed training stategy and therefore bad result migh occurs during training

In [None]:
import ray
from ray.tune.registry import register_env
from trademaster.environments.portfolio_management.environment import PortfolioManagementEnvironment
def env_creator(env_name):
    if env_name == 'portfolio_management':
        env = PortfolioManagementEnvironment
    else:
        raise NotImplementedError
    return env
ray.init(ignore_reinit_error=True)
register_env("portfolio_management", lambda config: env_creator("portfolio_management")(config))
trainer.test();

In [None]:
plot(trainer.test_environment.save_asset_memory(),alg="PPO")

In [None]:
# create a list of 100 numbers
