# Construct a custom Environment for Pair Trading

Built upon RL_restrict

Feed in both spread data and two pricing data

However, the trade is not limited to open-close pattern

It's a freestyle for learner to decide the percentage we should hold. 

In [21]:
import warnings
warnings.filterwarnings('ignore')

import os
import csv
import numpy as np
import pandas as pd
import gymnasium as gym
import statsmodels.api as sm

from gymnasium import spaces
from datetime import date
from envs.env_gridsearch import kellycriterion
from sklearn.model_selection import train_test_split
from stable_baselines3.common.vec_env import DummyVecEnv
from utils.read2df import read2df

os.makedirs("result/rl-freeop", exist_ok=True)

for root, dirs, files in os.walk(f"result/rl-freeop/"):
    for file in files:
        os.remove(os.path.join(root, file))

Define data parameters

In [22]:
# symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
symbols = ['BTCUSDT', 'BTCUSD', 'BTCTUSD', 'BTCUSDC', 'BTCBUSD', 'BTCDAI']
# symbols = ['USDCUSDT', 'DAIUSDT', 'TUSDUSDT', 'BUSDUSDT', 'USDCTUSD', 'USDCBUSD', 'DAIBUSD', 'TUSDBUSD', 'BUSDDAI']
start_date = '2022-01-01'
trade_date = '2023-01-01'

# freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}
freqs = {'3m':3, '5m':5, '15m':15, '30m':30}

Download data from `binance-public-data`

In [23]:
%%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py \
        -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py \
        -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

In [24]:
import pickle

dfs = read2df(symbols, freqs)

with open('result/cointncorr.pickle', 'rb') as pk:
    data = pickle.load(pk)

freq_position = list(freqs.keys()).index(data[1])

df0 = dfs[freq_position][dfs[freq_position]['tic']==data[0][0]].reset_index(drop=True)
df1 = dfs[freq_position][dfs[freq_position]['tic']==data[0][1]].reset_index(drop=True)

Set data before `trade_data` as training data, after `trade_data` is trade_data

In [25]:
train0 = df0[df0['datetime'] < trade_date]
train1 = df1[df1['datetime'] < trade_date]

test0 = df0[df0['datetime'] >= trade_date]
test1 = df1[df1['datetime'] >= trade_date]

print(f"The length of our training data: {len(train0)}")

The length of our training data: 130140


# Define the custom Environment

The behaviour of RL learner is restricted. 

The action is defined as discrete actions -1, 0, 1

-1 means short df0 long df1, 0 means close position, +1 means long df0 short df1

In [64]:
'''
Because it is freestyle, so we don't care about whether we have a position or not.
But we do need to make sure that if we are holding the maximum as we can then we shouldn't long further.
'''

# The lookback period for the observation space
PERIOD = 15 # Only look at the current price
CASH = 10000
isKelly = True

class PairTradingEnv(gym.Env):
    metadata = {'render.modes': ['console']}

    # for pair trading, we need to feed in two OHLCV dataframes
    def __init__(self, df0, df1, tc=0.001, period=PERIOD, cash=CASH, isKelly=True, model=""):
        super().__init__()

        if not df0['time'].equals(df1['time']):
            raise ValueError("Two dataframe must have same time index")

        self.cash = cash
        self.period = period
        self.model = model

        self.tic0 = df0['tic'].iloc[0]
        self.tic1 = df1['tic'].iloc[0]

        # transaction cost
        self.tc = tc
        self.isKelly = isKelly

        self.df0 = df0[['close', 'datetime']]
        self.df1 = df1[['close', 'datetime']]

        self.reward_range = (-np.inf, np.inf)

        # The action space should be something between -100% to 100%
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
        
        self.observation_space = spaces.Dict({
            "price_leg0": spaces.Box(low=0, high=np.inf, shape=(PERIOD,), dtype=np.float64),
            "price_leg1": spaces.Box(low=0, high=np.inf, shape=(PERIOD,), dtype=np.float64),
            "zscore":     spaces.Box(low=-np.inf, high=np.inf, shape=(PERIOD,), dtype=np.float64),
            "position":   spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float64),
        })

        # if the length is 35, then the index shall be 0~34
        self.max_steps = len(df0)-1
    
    def _kellycriterion(self, direct):
        # direct is +1 or -1
        spreads = pd.Series(self.residuals.iloc[-self.period:-1]) * direct   
        kc_f = kellycriterion(spreads)

        return kc_f

    def _next_observation(self):
        # The current step is always higher than the PERIOD as defined in the 

        prices0 = df0['close'].iloc[self.current_step-self.period: self.current_step]
        prices1 = df1['close'].iloc[self.current_step-self.period: self.current_step]
        
        model = sm.OLS(prices0, sm.add_constant(prices1)).fit()

        # positive residual means df0 > df1 at that point
        self.residuals = model.resid
        zscores = self.residuals / np.std(self.residuals)
        
        # print(len(np.array(prices0)), len(np.array(prices1)), len(np.array(zscores)))
        
        obs = {
            "price_leg0": np.array(prices0.iloc[-PERIOD:]),
            "price_leg1": np.array(prices1.iloc[-PERIOD:]),
            "zscore": np.array(zscores.iloc[-PERIOD:]),
            "position": np.array(self.action)
        }
        
        return obs

    def _take_action(self, action):

        self.curr_price0 = self.df0['close'].iloc[self.current_step]
        self.curr_price1 = self.df1['close'].iloc[self.current_step]

        max_amount0 = self.cash/self.curr_price0
        max_amount1 = self.cash/self.curr_price1

        curr_holding0 = self.holding0/max_amount0
        curr_holding1 = self.holding1/max_amount1

        # The action swing from -1 to 1, 1 means long curr0 short curr1, -1 means short curr0 long curr1
        self.action = action
        
        self.holding0 = max_amount0 * action
        self.holding1 = max_amount1 * action

        tc_fee0 = abs(action-curr_holding0)*max_amount0*self.curr_price0*self.tc 
        tc_fee1 = abs(-action-curr_holding1)*max_amount1*self.curr_price1*self.tc
        tc_fee =  tc_fee0 + tc_fee1

        # We record the net_worth from previous period to prev_net_worth
        self.prev_net_worth = self.net_worth
        self.net_worth = self.cash + self.holding0*self.curr_price0 + self.holding1*self.curr_price1 - tc_fee

    def step(self, action):
        self.action = action # for rendering
        self._take_action(action)
        self.current_step += 1

        self.observation = self._next_observation()
        reward = floatself.net_worth - self.prev_net_worth
        terminated = bool(self.current_step >= self.max_steps)
        truncated = bool(self.net_worth <= 0)
        info = {}

        return self.observation, reward, terminated, truncated, info

    def reset(self, seed=None):
        np.random.seed(seed)
        
        self.cash = self.cash
        self.net_worth = self.cash
        self.prev_net_worth = self.cash
        self.max_net_worth = self.cash
        self.position = 0
        self.holding0 = 0
        self.holding1 = 0
        self.render_step = 0
        self.order_amount0 = 0
        self.order_amount1 = 0
        self.action = [0]

        # self.current_step = self.period 
        self.current_step = np.random.randint(self.period, self.max_steps)

        obs = self._next_observation()
        
        print(obs)
        
        return self._next_observation(), {}
    
    def render(self):
        profit = self.net_worth - self.cash
        # print(self.df0['datetime'].iloc[self.current_step], self.net_worth)

        with open(f"result/rl-freeop/networth_{self.model}.csv", mode='a', newline='') as csv_f:
            if self.action != 1:
                writer = csv.writer(csv_f)
                writer.writerow(
                    [self.df0['datetime'].iloc[self.current_step], 
                    self.net_worth, self.action, self.position, 
                    self.curr_price0*self.order_amount0, self.curr_price1*self.order_amount1]
                )

## Check with baselin3 `env_checker`

Check if the env meets the requirements of `stable_baseline3`

In [65]:
from stable_baselines3.common.env_checker import check_env
# > UserWarning: The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the action using a wrapper.
# Baseline 3 does not support Dict/Tuple action spaces....only Box Discrete MultiDiscrete MultiBinary
# Is there another way to achieve the same functionality?

env = PairTradingEnv(train0, train1)
check_env(env)

{'price_leg0': array([30175.02, 30157.94, 30153.88, 30171.94, 30141.94, 30150.  ,
       30164.26, 30183.23, 30247.31, 30228.01, 30271.25, 30273.48,
       30323.93, 30312.24, 30348.01]), 'price_leg1': array([30105.7 , 30105.7 , 30105.7 , 30145.4 , 30145.4 , 30113.66,
       30113.66, 30231.19, 30231.19, 30231.19, 30231.19, 30285.42,
       30285.42, 30290.81, 30290.81]), 'zscore': array([ 1.04702501,  0.43822105,  0.29350536, -0.25475647, -1.32408425,
       -0.0837943 ,  0.42449284, -2.42818758, -0.14410343, -0.83203764,
        0.70922015, -0.83955526,  0.95869764,  0.38018086,  1.65517603]), 'position': array([0])}
{'price_leg0': array([31280.  , 31262.01, 31312.01, 31403.01, 31427.32, 31397.97,
       31368.99, 31364.56, 31393.48, 31437.72, 31377.39, 31428.25,
       31420.28, 31405.34, 31432.9 ]), 'price_leg1': array([31259.98, 31260.04, 31320.33, 31402.91, 31407.25, 31361.82,
       31368.52, 31349.08, 31389.35, 31369.03, 31369.03, 31410.  ,
       31437.6 , 31411.21, 31419.46])

AssertionError: The reward returned by `step()` must be a float

## Do a test run with random generated actions

In [8]:
env = PairTradingEnv(train0, train1, tc=0, model="test")
obs, _ = env.reset()

print(f"observation_space: {env.observation_space}")
print(f"action_space: {env.action_space}")
print(f"action_space.sample: {env.action_space.sample()}")

n_steps = 5

for step in range(n_steps):
    obs, reward, terminated, truncated, info = env.step(action=env.action_space.sample())
    done = terminated or truncated
    env.render()
    if done:
        break

observation_space: Dict('price_leg0': Box(0.0, inf, (15,), float64), 'price_leg1': Box(0.0, inf, (15,), float64), 'zscore': Box(-inf, inf, (15,), float64))
action_space: Discrete(3)
action_space.sample: 0


## Models from stable_baselines3

Train with training data

In [9]:
'''PPO'''

from stable_baselines3 import PPO

env = PairTradingEnv(train0, train1, tc=0, model="ppo", isKelly=False)

model_ppo = PPO("MultiInputPolicy", env, verbose=0, tensorboard_log="logs")
model_ppo.learn(total_timesteps=100000)
model_ppo.save("result/rl-freeop/ppo_pairtrading")

In [10]:
'''A2C'''

from stable_baselines3 import A2C

env = PairTradingEnv(train0, train1, tc=0, model="a2c", isKelly=False)

model_a2c = A2C("MultiInputPolicy", env, verbose=0, tensorboard_log="logs")
model_a2c.learn(total_timesteps=100000)
model_a2c.save("result/rl-freeop/a2c_pairtrading")

In [11]:
'''DQN'''

from stable_baselines3 import DQN

env = PairTradingEnv(train0, train1, tc=0, model="dqn", isKelly=False)

model_dqn = DQN("MultiInputPolicy", env, verbose=0, tensorboard_log="logs")
model_dqn.learn(total_timesteps=100000)
model_dqn.save("result/rl-freeop/dqn_pairtrading")

## Use the model on Test data

In [12]:
# del model_ppo, model_a2c, model_dqn

model_ppo = PPO.load("result/rl-freeop/ppo_pairtrading.zip")
model_a2c = A2C.load("result/rl-freeop/a2c_pairtrading.zip")
model_dqn = DQN.load("result/rl-freeop/dqn_pairtrading.zip")

In [13]:
env = PairTradingEnv(test0, test1, tc=0, model="ppo")

env.reset()
while True:
    action, _states = model_ppo.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

Test Finished!


In [14]:
env = PairTradingEnv(test0, test1, tc=0, model="a2c")

env.reset()
while True:
    action, _states = model_a2c.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

Test Finished!


In [15]:
env = PairTradingEnv(test0, test1, tc=0, model="dqn")

env.reset()
while True:
    action, _states = model_dqn.predict(obs)
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    env.render()
    if terminated:
        print("Test Finished!")
        break
    elif truncated:
        print("bankrupted!")
        break

Test Finished!


### Analyze with PyFolio

In [16]:
folder_path = f"result/rl-freeop/"
os.remove(f"{folder_path}networth_test.csv") if os.path.exists(f"{folder_path}networth_test.csv") else None
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

best_res, best_model = None, None
for file_name in csv_files:
    file_path = os.path.join(folder_path, file_name)
    
    with open(file_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        
        # Loop through the lines in the CSV file
        last_line = None
        for row in csv_reader:
            last_line = row  # Update last_line with the current row
    
    if best_res is None or float(best_res) < float(last_line[1]):
        best_res = last_line[1]
        best_model = file_name

    print(f"The ending capital of {file_name} is {last_line[0:2]}")

print(f"The best model is {best_model}")

The ending capital of networth_a2c.csv is ['2023-10-31 23:59:59.999000']
The ending capital of networth_dqn.csv is ['2023-10-31 23:53:59.999000']
The ending capital of networth_ppo.csv is ['2023-10-31 23:56:59.999000']
The best model is networth_a2c.csv


In [17]:
def get_return(networthcsv):
    returns = pd.read_csv(networthcsv, names=['datetime', 'returns', "action", "position", "order0", "order1"])
    returns['datetime'] = pd.to_datetime(returns['datetime'])
    returns.set_index('datetime', inplace=True)
    res_daily = returns.resample('D').mean()
    res_daily['returns'] = res_daily['returns'].pct_change()
    res_daily = res_daily.dropna()
    return res_daily

best_return = get_return(f'result/rl-freeop/{best_model}')

In [18]:
best_df = pd.read_csv(f'result/rl-freeop/{best_model}', names=["datetime", "networth", "action", "position", "order0", "order1"])
best_df = best_df[best_df['action']!=1]
best_df = best_df[best_df['order0']!=0]
best_df.head(20)

Unnamed: 0,datetime,networth,action,position,order0,order1


In [19]:
# # Calculate total orders count
# total_orders_count = best_df.shape[0]

# # Calculate won orders count
# won_orders_count = best_df[(best_df['order1'] == 1) & (best_df['position'] == 0)].shape[0]

# # Calculate lost orders count
# lost_orders_count = best_df[(best_df['order1'] == 2) & (best_df['position'] == 0)].shape[0]

# # Calculate Win/Loss order ratio
# win_loss_order_ratio = won_orders_count / lost_orders_count if lost_orders_count != 0 else np.inf

# # Calculate Avg order pnl
# avg_order_pnl = best_df['order0'].mean()

# # Calculate Avg order pnl won
# avg_order_pnl_won = best_df[(best_df['order1'] == 1) & (best_df['position'] == 0)]['order0'].mean()

# # Calculate Avg order pnl lost
# avg_order_pnl_lost = best_df[(best_df['order1'] == 2) & (best_df['position'] == 0)]['order0'].mean()

# # Calculate Avg long order pnl
# avg_long_order_pnl = best_df[(best_df['order1'] == 1) & (best_df['position'] == 2)]['order0'].mean()

# # Calculate Avg short order pnl
# avg_short_order_pnl = best_df[(best_df['order1'] == 1) & (best_df['position'] == 0)]['order1'].mean()

# # Print the calculated indices
# print("Total orders count:", total_orders_count)
# print("Won orders count:", won_orders_count)
# print("Lost orders count:", lost_orders_count)
# print("Win/Loss order ratio:", win_loss_order_ratio)
# print("Avg order pnl:", avg_order_pnl)
# print("Avg order pnl won:", avg_order_pnl_won)
# print("Avg order pnl lost:", avg_order_pnl_lost)
# print("Avg long order pnl:", avg_long_order_pnl)
# print("Avg short order pnl:", avg_short_order_pnl)


In [20]:
import pyfolio

pyfolio.tears.create_full_tear_sheet(best_return['returns'])

AttributeError: 'Series' object has no attribute 'iteritems'