# Construct a custom Environment for Financial Trading

Some examples on the market
* [custom env example](https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb#scrollTo=RqxatIwPOXe_)
* [StockTradingEnv by Adam King](https://github.com/notadamking/Stock-Trading-Environment)
* [FinRL](https://github.com/AI4Finance-Foundation/FinRL)

target is to construct a custom Env for pair trading

In [10]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

Got inspiration from single asset trading scenario 

=> [StockTradingEnv by Adam King](https://github.com/notadamking/Stock-Trading-Environment)

In [55]:
# The lookback period for the observation space
PERIOD = 30
CASH = 10000

class PairTradingEnv(gym.Env):
    metadata = {'render.modes': ['console']}

    # for pair trading, we need to feed in two OHLCV dataframes
    def __init__(self, df0, df1):
        super().__init__()
        self.current_step = 0

        # get two datasets
        self.df0 = df0[['time', 'open', 'high', 'low', 'close', 'volume']]
        self.df1 = df1[['time', 'open', 'high', 'low', 'close', 'volume']]

        if not df0['time'].equals(df1['time']):
            raise ValueError("Two dataframe must have same time index")

        self.tic0 = df0['tic'].iloc[0]
        self.tic1 = df1['tic'].iloc[0]

        self.max_step = len(df0)-1

        # -1 means short 100%, 1 means long 100%, 0 means no action. And we need two actions for df1 n df2
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, ), dtype=np.float32)

        # The data requires to be at least [time, open, high, low, close, volume]
        # Let's assume that we feed in previous 30 period data into the observation_space
        self.observation_space = spaces.Box(low=0, high=1.0, shape=(2, PERIOD, 6), dtype=np.float32)
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
        self.cash = CASH
        self.net_worth = CASH
        self.holding0 = 0
        self.holding1 = 0
        self.current_step = 0
        obs = np.vstack([self.df0.iloc[0], self.df1.iloc[0]]).astype(np.float32)

        return obs, {}

    def _take_action(self, action):
        current_price0 = self.df0['close'].iloc[self.current_step]
        current_price1 = self.df1['close'].iloc[self.current_step]
        print(current_price0, current_price1)

        if sum(action) > 1 or sum(action) < -1:
            action0 = action[0]/sum(action)
            action1= action[1]/sum(action)
            action = [action0, action1]

        # don't trade in the first observation period
        if self.current_step >= PERIOD:
            self.holding0 += action[0]*self.cash/current_price0
            self.holding1 += action[1]*self.cash/current_price0
            self.cash -= self.cash*sum(action)

        self.net_worth = self.cash + self.holding0*current_price0 + self.holding1*current_price1

    def _next_observation(self):
        if self.current_step <= PERIOD-1:
            obs_df0 = self.df0.iloc[0:self.current_step+1]
            obs_df1 = self.df1.iloc[0:self.current_step+1]
        else:
            obs_df0 = self.df0.iloc[self.current_step-PERIOD+1:self.current_step+1]
            obs_df1 = self.df1.iloc[self.current_step-PERIOD+1:self.current_step+1]

        obs = np.vstack([obs_df0, obs_df1])

        return obs

    def step(self, action):
        self._take_action(action)
        self.current_step += 1

        observation = self._next_observation()
        terminated = bool(self.current_step >= self.max_step)
        truncated = bool(self.net_worth <= 0)
        info = {}

        estimate_value0 = self.holding0 * df0['close'].iloc[self.current_step+1]
        estimate_value1 = self.holding1 * df1['close'].iloc[self.current_step+1]
        estimate_net_worth = self.cash + estimate_value0 + estimate_value1
        reward = estimate_net_worth - self.net_worth

        return observation, reward, terminated, truncated, info
    
    def render(self):
        profit = self.net_worth - CASH

        print(f"Current profit is {profit}")
        print(f"Current balance is {self.cash}")
        print(f"Current holding is {self.holding0} of {self.tic0} and {self.holding1} of {self.tic1}")

In [52]:
from stable_baselines3.common.env_checker import check_env
from utils.read2df import read2df

symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
start_date = '2023-01-01'

freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}

# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)

df0 = dfs[0][dfs[0]['tic']=='BTCUSDT'].reset_index(drop=True)
df1 = dfs[0][dfs[0]['tic']=='ETHUSDT'].reset_index(drop=True)

In [53]:
env = PairTradingEnv(df0, df1)
check_env(env)

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/

In [58]:
import random

env = PairTradingEnv(df0, df1)

obs, _ = env.reset()

print(env.observation_space)
print(env.action_space)
print(env.action_space.sample())

# Hardcoded best agent: always go left!
n_steps = 20

for step in range(n_steps):
    print(f"Step {step + 1}")
    obs, reward, terminated, truncated, info = env.step(action=[random.uniform(-1, 1) for _ in range(2)])
    done = terminated or truncated
    env.render()
    if done:
        print("Training Finished!", "reward=", reward)
        break

Box(0.0, 1.0, (2, 30, 6), float32)
Box(-1.0, 1.0, (2,), float32)
[ 0.11224975 -0.83228016]
Step 1
16529.67 1194.09
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 2
16551.47 1196.02
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 3
16548.19 1195.4
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 4
16533.04 1194.04
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 5
16521.85 1192.92
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 6
16530.71 1194.54
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 7
16539.79 1194.81
Current profit is 0.0
Current balance is 10000
Current holding is 0 of BTCUSDT and 0 of ETHUSDT
Step 8
16526.19 1195.06
Current profit is 0.0
Current balance is 10000