In [1]:
import pandas as pd

from envs.reward_func import profit_per_trade_reward, profit_per_tick_reward
from utils.data_loader import DataLoader
from utils.preprocessor import Preprocessor
from utils.trainer import Trainer
from utils.backtest import backtest

In [2]:
data = DataLoader.fetch_data("^N225", interval="1d", start="2015-01-01", end="2021-08-31")
data_len = len(data)
data_train = data.iloc[: int(data_len * 0.7), :]
data_eval = data.iloc[int(data_len * 0.7) :, :]
print(f"Training Sapn: {data_train.index[0]} to {data_train.index[-1]}, Length: {len(data_train)}")
print(f"Evaluating Span: {data_eval.index[0]} to {data_eval.index[-1]}, Length: {len(data_eval)}")

[*********************100%***********************]  1 of 1 completed
Training Sapn: 2015-01-05 00:00:00 to 2019-08-26 00:00:00, Length: 1138
Evaluating Span: 2019-08-27 00:00:00 to 2021-08-30 00:00:00, Length: 488


In [3]:
data_train, features_train, data_eval, features_eval = Preprocessor.preprocessing(data_train, data_eval)
features_train

Unnamed: 0_level_0,log_return,log_volume_diff,ADX,MACD Hist,RSI,StochRSI,MFI,BB High Gap,BB Low Gap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-02-23,0.574973,0.033982,-0.831586,0.902504,1.473569,0.191292,0.751865,1.022038,1.065867
2015-02-24,0.579049,-0.036817,-0.653839,1.010102,1.658171,0.001717,1.258959,1.045276,1.342310
2015-02-25,-0.085435,-0.019289,-0.476995,0.985200,1.592380,-0.075575,1.223537,0.855127,1.354679
2015-02-26,0.847004,0.051187,-0.277161,1.064580,1.854243,-0.036929,1.796678,0.953544,1.816864
2015-02-27,0.044245,0.053456,-0.073923,1.040719,1.869259,0.001717,1.823826,0.796376,1.832177
...,...,...,...,...,...,...,...,...,...
2019-08-20,0.432947,-0.004228,0.646786,-0.493047,-0.867505,0.927175,-0.834875,-0.858756,-0.146687
2019-08-21,-0.233227,0.025994,0.680780,-0.273178,-0.970882,1.165676,-0.750992,-0.845619,-0.197382
2019-08-22,0.029217,0.013883,0.666402,-0.085561,-0.946114,1.534731,-0.262986,-0.707653,-0.171160
2019-08-23,0.311939,-0.033527,0.654168,0.135419,-0.723786,1.421801,0.305472,-0.438723,-0.074422


In [4]:
config = {
    "env": "DescTradingEnv",
    "env_config": {
        "df": data_train,
        "features": features_train,
        "reward_func": profit_per_tick_reward,
    },

    "evaluation_num_workers": 1,
    "evaluation_interval": 1,
    "evaluation_num_episodes": 1,
    "evaluation_config": {
        "env_config": {
            "df": data_eval,
            "features": features_eval,
            "reward_func": profit_per_tick_reward,
        },
        "explore": False,
    },

    "model": {
        # By default, the MODEL_DEFAULTS dict above will be used.

        # Change individual keys in that dict by overriding them, e.g.
        "fcnet_hiddens": [128, 64],
        # "fcnet_activation": "relu",
    },
    
    "num_workers": 4,  # parallelism
    "framework": "torch",
    "log_level": "WARN",  # "WARN", "DEBUG"
    "seed": 0,
}

In [5]:
agent = Trainer.get_agent_from_str(algo="PPO", user_config=config)

2021-09-19 20:22:47,578	INFO services.py:1265 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-19 20:23:12,124	INFO trainable.py:109 -- Trainable.setup took 28.540 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [6]:
agent, last_checkpoint = Trainer.learn(agent, timesteps_total=1e5)

episodes_total:  36 timesptes_total:  40000
Train 2.71941070756698 | Eval 3.963699906119583
checkpoint saved at ./ray_results/PPO\checkpoint_000011\checkpoint-11
episodes_total:  72 timesptes_total:  80000
Train 9.372454931878 | Eval 5.673820986540628
checkpoint saved at ./ray_results/PPO\checkpoint_000021\checkpoint-21


In [7]:
env_train = Trainer.get_env_from_str(config["env"], config["env_config"])
env_eval = Trainer.get_env_from_str(config["env"], config["evaluation_config"]["env_config"])

In [8]:
stats = pd.DataFrame()
stats["train"] = backtest(agent, env_train, plot=True, plot_filename="PPO_train_backtest")
stats["eval"] = backtest(agent, env_eval, plot=True, plot_filename="PPO_eval_backtest")
stats

Unnamed: 0,train,eval
Start,2015-02-23 00:00:00,2019-10-16 00:00:00
End,2019-08-26 00:00:00,2021-08-30 00:00:00
Duration,1645 days 00:00:00,684 days 00:00:00
Exposure Time [%],98.00905,95.164835
Equity Final [$],133695.589617,97725.510191
Equity Peak [$],133869.919695,105948.379914
Return [%],33.69559,-2.27449
Buy & Hold Return [%],9.715313,23.65678
Return (Ann.) [%],6.846806,-1.26618
Volatility (Ann.) [%],3.119912,4.463487
