In [1]:
import numpy as np
import pandas as pd
import os

from stable_baselines3 import DQN, PPO, A2C
from sb3_contrib import QRDQN

from utils import preprocessing
from utils.feature_enginner import FeatureEngineer
from utils.data_loader import DataLoader
from models.features_extractor import CustomCNN
from envs.trading_env import SimpleTradingEnv
from envs.base import BaseTradingEnv

### Prepare Dataset

In [2]:
# load the data
df = DataLoader.load_data("./data/3600/ethusd/2021-01-01.csv")
train_df = df[: len(df) // 2]
eval_df = df[len(df) // 2 :]

# extract feature from the data
feature_enginner = FeatureEngineer([])
train_features = feature_enginner.preprocessing(train_df)
eval_features = feature_enginner.preprocessing(eval_df)

### Define Environmnet

#### Hyperparameter for Envirionment

In [3]:
window_size = 20
fee = 0.000665

If you want to change the action, define it.  
The default is BUY and SELL only.

In [4]:
from enum import Enum
class Actions(Enum):
    Sell = 0
    Buy = 1
    # Hold = 2

In [5]:
class TradingEnv(BaseTradingEnv):
    def _calculate_reward(self):
        # reward = 0.0
        reward = self.position.profit_or_loss_pct
        if self.closed_trades.empty:
            return reward

        trade = self.closed_trades.iloc[-1, :]
        return reward if trade["ExitTime"] != self.current_datetime else trade["ReturnPct"]
    # def _calculate_reward(self):
    #     reward = (self.wallet.equity - self.prev_equity) / self.prev_equity
    #     return reward

In [6]:
train_env = TradingEnv(train_df, train_features, window_size=window_size, fee=fee, actions=Actions)
eval_env = TradingEnv(eval_df, eval_features, window_size=window_size, fee=fee, actions=Actions)

### Define Agent

In [7]:
# policy_kwargs = dict(
#     features_extractor_class=CustomCNN,
#     features_extractor_kwargs=dict(features_dim=32),
# )
# model = PPO("CnnPolicy", train_env, verbose=0, tensorboard_log="./logs", policy_kwargs=policy_kwargs)
# model = PPO("MlpPolicy", train_env, verbose=1, tensorboard_log="./logs")
# model_class = DQN
# model = model_class("MlpPolicy", train_env, tensorboard_log="./logs", target_update_interval=500, learning_starts=500, learning_rate=1e-3)
model_class = A2C
model = model_class('MlpPolicy', train_env, tensorboard_log='./logs')
model_name = model.__class__.__name__

### Training And Evaluation

In [None]:
model.learn(total_timesteps=30000, eval_env=eval_env, eval_freq=3000, n_eval_episodes=1)
model.save(f"./results/{model_name}")
del model

Eval num_timesteps=3000, episode_reward=-1.32 +/- 0.00
Episode length: 2141.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6000, episode_reward=0.09 +/- 0.00
Episode length: 2141.00 +/- 0.00
New best mean reward!
Eval num_timesteps=9000, episode_reward=0.44 +/- 0.00
Episode length: 2141.00 +/- 0.00
New best mean reward!
Eval num_timesteps=12000, episode_reward=0.87 +/- 0.00
Episode length: 2141.00 +/- 0.00
New best mean reward!
Eval num_timesteps=15000, episode_reward=0.60 +/- 0.00
Episode length: 2141.00 +/- 0.00
Eval num_timesteps=18000, episode_reward=0.14 +/- 0.00
Episode length: 2141.00 +/- 0.00
Eval num_timesteps=21000, episode_reward=-0.64 +/- 0.00
Episode length: 2141.00 +/- 0.00
Eval num_timesteps=24000, episode_reward=-0.92 +/- 0.00
Episode length: 2141.00 +/- 0.00
Eval num_timesteps=27000, episode_reward=-1.26 +/- 0.00
Episode length: 2141.00 +/- 0.00
Eval num_timesteps=30000, episode_reward=-0.55 +/- 0.00
Episode length: 2141.00 +/- 0.00


In [None]:
model = model_class.load(f"./results/{model_name}")
print(model)

<stable_baselines3.a2c.a2c.A2C object at 0x000001471D518248>


In [None]:
from utils import backtest

In [None]:
stats_train = backtest(model, train_env, plot=True, plot_filename=f"./results/BackTest-{model_name}-train")
stats_eval = backtest(model, eval_env, plot=True, plot_filename=f"./results/BackTest-{model_name}-eval")

stats = pd.DataFrame()
stats["train"] = stats_train
stats["eval"] = stats_eval
# stats = stats.drop(["Best Trade [%]", "Worst Trade [%]", "_strategy", "_equity_curve", "_trades"], axis=0)
stats

Unnamed: 0,train,eval
Start,2021-01-01 00:00:00,2021-04-01 00:00:00
End,2021-03-31 23:00:00,2021-06-30 00:00:00
Duration,89 days 23:00:00,90 days 00:00:00
Exposure Time [%],98.148148,98.334105
Equity Final [$],459175.01355,643434.843461
Equity Peak [$],1000000.0,1039083.301766
Return [%],-54.082499,-35.656516
Buy & Hold Return [%],149.948745,21.191475
Return (Ann.) [%],-95.511407,-82.747523
Volatility (Ann.) [%],7.914249,32.286747


In [None]:
stats.loc["_trades", "train"]

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1354,20,21,738.078851,740.11,-2750.175814,-0.002752,2021-01-01 20:00:00,2021-01-01 21:00:00,0 days 01:00:00
1,-1358,22,24,734.081511,739.54,-7412.628130,-0.007436,2021-01-01 22:00:00,2021-01-02 00:00:00,0 days 02:00:00
2,1342,25,26,737.119859,736.90,-295.050711,-0.000298,2021-01-02 01:00:00,2021-01-02 02:00:00,0 days 01:00:00
3,1355,27,29,730.145224,726.93,-4356.628384,-0.004404,2021-01-02 03:00:00,2021-01-02 05:00:00,0 days 02:00:00
4,-1348,30,31,730.533872,729.49,1407.139052,0.001429,2021-01-02 06:00:00,2021-01-02 07:00:00,0 days 01:00:00
...,...,...,...,...,...,...,...,...,...,...
353,-286,2096,2109,1683.779542,1764.40,-23057.451131,-0.047881,2021-03-29 08:00:00,2021-03-29 21:00:00,0 days 13:00:00
354,257,2111,2115,1784.085628,1834.60,12982.193475,0.028314,2021-03-29 23:00:00,2021-03-30 03:00:00,0 days 04:00:00
355,258,2116,2120,1829.015487,1813.70,-3951.395646,-0.008374,2021-03-30 04:00:00,2021-03-30 08:00:00,0 days 04:00:00
356,257,2121,2125,1818.108238,1796.50,-5553.317294,-0.011885,2021-03-30 09:00:00,2021-03-30 13:00:00,0 days 04:00:00
