# Construct a custom Environment for Pair Trading

Some examples on the market
* [custom env example](https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb#scrollTo=RqxatIwPOXe_)
* [StockTradingEnv by Adam King](https://github.com/notadamking/Stock-Trading-Environment)
* [FinRL](https://github.com/AI4Finance-Foundation/FinRL)

Target is to construct a custom Env for pair trading

This env restrict the behaviour of RL learner to pair trading only

In [None]:
import os
import pickle

from stable_baselines3 import PPO, A2C, DQN

from params import *
from utils.read2df import read2df, unify_dfs
from utils.clearlogs import clear_logs
from envs.env_rl_restrict_thres2 import RL_Restrict_TradeEnv

folder_path = f"result/rl-restrict-thres"
os.makedirs(folder_path, exist_ok=True)

Load data from `preliminaries.ipynb`

In [None]:
with open('result/cointncorr.pickle', 'rb') as pk:
    data = pickle.load(pk)

with open('result/gridsearch/best_res.pickle', 'rb') as pk:
    best_profit, best_params = pickle.load(pk)

dfs = read2df(symbols=data[0], freqs={data[1]: freqs[data[1]]})

tics, df = unify_dfs(dfs, symbols=data[0], period=best_params['period'])

df.head(10)

In [None]:
best_params['OPEN_THRE']

In [None]:
train = df[(df['datetime'] >= datetime.strptime(start_date, date_format)) & (df['datetime'] < datetime.strptime(trade_date, date_format))]
test = df[(df['datetime'] >= datetime.strptime(trade_date, date_format)) & (df['datetime'] < datetime.strptime(end_date, date_format))]

max_train_len = len(train)-best_params['period']-1
print(f"The length of our training data: {len(train)}")

## Check with baselin3 `env_checker`

Check if the env meets the requirements of `stable_baseline3`

In [None]:
from stable_baselines3.common.env_checker import check_env
# > UserWarning: The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the action using a wrapper.
# Baseline 3 does not support Dict/Tuple action spaces....only Box Discrete MultiDiscrete MultiBinary
# Is there another way to achieve the same functionality?

env = RL_Restrict_TradeEnv(train)
check_env(env)

In [None]:
log_path = f"logs/restrict_thres/"
clear_logs(log_path)

# Read more about tensorboard
# https://github.com/tensorflow/tensorboard/blob/master/README.md
# https://www.tensorflow.org/tensorboard/get_started

In [None]:
'''PPO'''

model_ppo = PPO("MlpPolicy", env, verbose=1, gamma=1, tensorboard_log=log_path)
model_ppo.learn(total_timesteps= 50000)#max_train_len)
model_ppo.save(f"{folder_path}/ppo_pairtrading")

In [None]:
try:
    os.remove(f"{folder_path}/networth_ppo.csv")
except OSError:
    pass

obs, _ = env.reset()

for i in range(20):
    env.render()
    action, _states = model_ppo.predict(obs)
    obs, rewards, terminated, truncated, info = env.step(action)