# Construct a custom Environment for Pair Trading

Some examples on the market
* [custom env example](https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb#scrollTo=RqxatIwPOXe_)
* [StockTradingEnv by Adam King](https://github.com/notadamking/Stock-Trading-Environment)
* [FinRL](https://github.com/AI4Finance-Foundation/FinRL)

Target is to construct a custom Env for pair trading

This env restrict the behaviour of RL learner to pair trading only

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import csv
import numpy as np
import pandas as pd
import statsmodels.api as sm

from datetime import date
from envs.env_gridsearch import kellycriterion
from sklearn.model_selection import train_test_split
from stable_baselines3.common.vec_env import DummyVecEnv
from utils.read2df import read2df
from envs.env_rl_restrict import PairTradingEnv
from params import *

from stable_baselines3 import PPO, A2C, DQN

os.makedirs("result/rl-restrict", exist_ok=True)

# for root, dirs, files in os.walk(f"result/rl-restrict/"):
#     for file in files:
#         os.remove(os.path.join(root, file))

Load data from `preliminaries.ipynb`

In [2]:
import pickle

with open('result/cointncorr.pickle', 'rb') as pk:
    data = pickle.load(pk)

dfs = read2df(symbols=data[0], freqs={data[1]: freqs[data[1]]}, marketType='futures/um')

df0 = dfs[0][dfs[0]['tic']==data[0][0]].reset_index(drop=True)
df1 = dfs[0][dfs[0]['tic']==data[0][1]].reset_index(drop=True)

Set data before `trade_data` as training data, after `trade_data` is trade_data

In [3]:
train0 = df0[df0['datetime'] < trade_date]
train1 = df1[df1['datetime'] < trade_date]

test0 = df0[df0['datetime'] >= trade_date]
test1 = df1[df1['datetime'] >= trade_date]

print(f"The length of our training data: {len(train0)}")

The length of our training data: 902459


## Check with baselin3 `env_checker`

Check if the env meets the requirements of `stable_baseline3`

In [None]:
from stable_baselines3.common.env_checker import check_env
# > UserWarning: The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the action using a wrapper.
# Baseline 3 does not support Dict/Tuple action spaces....only Box Discrete MultiDiscrete MultiBinary
# Is there another way to achieve the same functionality?

env = PairTradingEnv(train0, train1)
check_env(env)

## Do a test run with random generated actions

In [None]:
env = PairTradingEnv(train0, train1, tc=0, model="test")
obs, _ = env.reset()

print(f"observation_space: {env.observation_space}")
print(f"action_space: {env.action_space}")
print(f"action_space.sample: {env.action_space.sample()}")

n_steps = 20

for step in range(n_steps):
    obs, reward, terminated, truncated, info = env.step(action=env.action_space.sample())
    done = terminated or truncated
    env.render()
    if done:
        break

## Models from stable_baselines3

Train with training data

In [None]:
'''PPO'''

env = PairTradingEnv(train0, train1, tc=0, model="ppo")

model_ppo = PPO("MultiInputPolicy", env, verbose=0, tensorboard_log="logs")
model_ppo.learn(total_timesteps=30000)
model_ppo.save("result/rl-restrict/ppo_pairtrading")

In [None]:
'''A2C'''

from stable_baselines3 import A2C

env = PairTradingEnv(train0, train1, tc=0, model="a2c")

model_a2c = A2C("MultiInputPolicy", env, verbose=0)
model_a2c.learn(total_timesteps=30000)
model_a2c.save("result/rl-restrict/a2c_pairtrading")

In [None]:
'''DQN'''

from stable_baselines3 import DQN

env = PairTradingEnv(train0, train1, tc=0, model="dqn")

model_dqn = DQN("MultiInputPolicy", env, verbose=0)
model_dqn.learn(total_timesteps=30000)
model_dqn.save("result/rl-restrict/dqn_pairtrading")

## Use the model on Test data

In [None]:
# del model_ppo, model_a2c, model_dqn

model_ppo = PPO.load("result/rl-restrict/ppo_pairtrading")
model_a2c = A2C.load("result/rl-restrict/a2c_pairtrading")
model_dqn = DQN.load("result/rl-restrict/dqn_pairtrading")

In [None]:
try:
    os.remove("result/rl-restrict/networth_ppo.csv")
except OSError:
    pass

env = PairTradingEnv(test0, test1, tc=0, model="ppo", isKelly=True)
obs, _ = env.reset()

while True:
    action, _states = model_ppo.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

In [None]:
try:
    os.remove("result/rl-restrict/networth_a2c.csv")
except OSError:
    pass

env = PairTradingEnv(test0, test1, tc=0, model="a2c")
obs, _ = env.reset()

while True:
    action, _states = model_a2c.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

In [None]:
try:
    os.remove("result/rl-restrict/networth_dqn.csv")
except OSError:
    pass

env = PairTradingEnv(test0, test1, tc=0, model="dqn")
obs, _ = env.reset()

while True:
    action, _states = model_dqn.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

### Analyze with PyFolio

In [4]:
folder_path = f"result/rl-restrict/"
os.remove(f"{folder_path}networth_test.csv") if os.path.exists(f"{folder_path}networth_test.csv") else None
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

best_res, best_model = None, None
for file_name in csv_files:
    file_path = os.path.join(folder_path, file_name)
    
    with open(file_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        
        # Loop through the lines in the CSV file
        last_line = None
        for row in csv_reader:
            last_line = row  # Update last_line with the current row
    
    if best_res is None or float(best_res) < float(last_line[1]):
        best_res = last_line[1]
        best_model = file_name

    print(f"The ending capital of {file_name} is {last_line[0:2]}")

print(f"The best model is {best_model}")

The ending capital of networth_a2c.csv is ['2023-10-31 23:59:59.999000', '9997.304111344369']
The ending capital of networth_dqn.csv is ['2023-10-31 23:59:59.999000', '9955.08075602882']
The ending capital of networth_ppo.csv is ['2023-10-31 23:59:59.999000', '9972.573281128662']
The best model is networth_a2c.csv


In [5]:
from utils.rlmetrics import get_return

best_return = get_return(f'result/rl-restrict/{best_model}')

best_return.head(10)

Unnamed: 0_level_0,values,action,returns
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-07-03 23:07:59.999,10000.0,1,0.0
2023-07-03 23:08:59.999,10000.0,1,0.0
2023-07-03 23:09:59.999,10000.0,1,0.0
2023-07-03 23:10:59.999,10000.0,1,0.0
2023-07-03 23:11:59.999,10000.0,1,0.0
2023-07-03 23:12:59.999,10000.0,1,0.0
2023-07-03 23:13:59.999,10000.0,1,0.0
2023-07-03 23:14:59.999,10000.0,1,0.0
2023-07-03 23:15:59.999,10000.0,1,0.0
2023-07-03 23:16:59.999,10000.0,1,0.0


In [6]:
best_df = pd.read_csv(f'result/rl-restrict/{best_model}', names=["datetime", "networth"])

In [7]:
from utils.rlmetrics import get_metrics

get_metrics(best_return)

Total orders count: 172853
Total long action: 22
Total short action: 25
Won orders count: 22
Lost orders count: 25
Win/Loss order ratio: 0.88
Avg order pnl: -1.559614334779859e-09
Avg order pnl won: 2.1697352468909934e-05
Avg order pnl lost: -2.9877030837028862e-05
Avg long order pnl: -1.4325458382260084e-17
Avg short order pnl: -8.22387425648264e-18


In [8]:
import pyfolio

pyfolio.tears.create_full_tear_sheet(best_return['returns'])

AttributeError: 'Series' object has no attribute 'iteritems'