# Construct a custom Environment for Pair Trading

Some examples on the market
* [custom env example](https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb#scrollTo=RqxatIwPOXe_)
* [StockTradingEnv by Adam King](https://github.com/notadamking/Stock-Trading-Environment)
* [FinRL](https://github.com/AI4Finance-Foundation/FinRL)

Target is to construct a custom Env for pair trading

This env restrict the behaviour of RL learner to pair trading only

## 1. Import the Custom Trading Env

In [None]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import trange
from stable_baselines3 import PPO, A2C, DQN

from params import *
from utils.read2df import read2df, unify_dfs
from utils.clearlogs import clear_logs
from envs.env_rl_restrict_thres import RL_Restrict_TradeEnv

folder_path = f"result/rl-restrict-thres"
os.makedirs(folder_path, exist_ok=True)

## 2. Prepare Trading Data
Load data from `preliminaries.ipynb`

In [None]:
with open('result/cointncorr.pickle', 'rb') as pk:
    cointncorr = pickle.load(pk)

with open('result/gridsearch/best_res.pickle', 'rb') as pk:
    best_profit, best_params = pickle.load(pk)

dfs = read2df(symbols=cointncorr[0], freqs={cointncorr[1]: freqs[cointncorr[1]]})
tics, df = unify_dfs(dfs, symbols=cointncorr[0], period=best_params['period'])

In [None]:
df.tail(20)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)

ax1.plot(df['datetime'], df['spread'], label='spread')
ax2.plot(df['datetime'], df['zscore'], label='zscore')

ax1.legend()
ax2.legend()
plt.suptitle(f'The Spread between {cointncorr[0][0]} and {cointncorr[0][1]}')

plt.xticks(rotation=45)
plt.show()

In [None]:
train = df[(df['datetime'] >= datetime.strptime(start_date, date_format)) & (df['datetime'] < datetime.strptime(trade_date, date_format))]
test = df[(df['datetime'] >= datetime.strptime(trade_date, date_format)) & (df['datetime'] < datetime.strptime(end_date, date_format))]

max_train_len = (len(train)-best_params['period']) * 0.95
print(f"The length of our training data: {len(train)}")

## 3. Check with baselin3 `env_checker`

Check if the env meets the requirements of `stable_baseline3`

In [None]:
from stable_baselines3.common.env_checker import check_env

env = RL_Restrict_TradeEnv(train)
obs, _ = env.reset()
check_env(env)

Tensorboard

## 4. Train the Model with Trading Env

In [None]:
log_path = f"logs/restrict_thres/"
clear_logs(log_path)

# Read more about tensorboard
# https://github.com/tensorflow/tensorboard/blob/master/README.md
# https://www.tensorflow.org/tensorboard/get_started

In [None]:
'''PPO'''

model_ppo = PPO("MultiInputPolicy", env, verbose=1, gamma=1, batch_size=256)
model_ppo.learn(total_timesteps=int(max_train_len), progress_bar=True)
model_ppo.save(f"{folder_path}/ppo_pairtrading")

-------------------------------------------
| time/                   |               |
|    fps                  | 351           |
|    iterations           | 22            |
|    time_elapsed         | 128           |
|    total_timesteps      | 45056         |
| train/                  |               |
|    approx_kl            | 0.00039190365 |
|    clip_fraction        | 0.00293       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0852       |
|    explained_variance   | -0.000712     |
|    learning_rate        | 0.0003        |
|    loss                 | 157           |
|    n_updates            | 210           |
|    policy_gradient_loss | -0.00122      |
|    value_loss           | 337           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 353           |
|    iterations           | 23            |
|    time_elapsed         | 133           |
|    total_timesteps      | 47104         |
| train/                  |               |
|    approx_kl            | 0.00041572363 |
|    clip_fraction        | 0.00293       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0586       |
|    explained_variance   | 3.04e-05      |
|    learning_rate        | 0.0003        |
|    loss                 | 158           |
|    n_updates            | 220           |
|    policy_gradient_loss | -0.00142      |
|    value_loss           | 345           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 355           |
|    iterations           | 24            |
|    time_elapsed         | 138           |
|    total_timesteps      | 49152         |
| train/                  |               |
|    approx_kl            | 3.7014164e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0738       |
|    explained_variance   | 0.0027        |
|    learning_rate        | 0.0003        |
|    loss                 | 155           |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.00028      |
|    value_loss           | 337           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 356           |
|    iterations           | 25            |
|    time_elapsed         | 143           |
|    total_timesteps      | 51200         |
| train/                  |               |
|    approx_kl            | 0.00014277536 |
|    clip_fraction        | 0.00151       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0816       |
|    explained_variance   | 0.00157       |
|    learning_rate        | 0.0003        |
|    loss                 | 146           |
|    n_updates            | 240           |
|    policy_gradient_loss | -0.000629     |
|    value_loss           | 320           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 357           |
|    iterations           | 26            |
|    time_elapsed         | 149           |
|    total_timesteps      | 53248         |
| train/                  |               |
|    approx_kl            | 0.00036866544 |
|    clip_fraction        | 0.00391       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0825       |
|    explained_variance   | 0.00215       |
|    learning_rate        | 0.0003        |
|    loss                 | 149           |
|    n_updates            | 250           |
|    policy_gradient_loss | -0.00163      |
|    value_loss           | 327           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 358          |
|    iterations           | 27           |
|    time_elapsed         | 154          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 0.0006094368 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0974      |
|    explained_variance   | 0.00102      |
|    learning_rate        | 0.0003       |
|    loss                 | 156          |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00168     |
|    value_loss           | 339          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 359           |
|    iterations           | 28            |
|    time_elapsed         | 159           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 0.00023004919 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0935       |
|    explained_variance   | 0.000815      |
|    learning_rate        | 0.0003        |
|    loss                 | 143           |
|    n_updates            | 270           |
|    policy_gradient_loss | -0.000699     |
|    value_loss           | 320           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 361           |
|    iterations           | 29            |
|    time_elapsed         | 164           |
|    total_timesteps      | 59392         |
| train/                  |               |
|    approx_kl            | 0.00041149367 |
|    clip_fraction        | 0.0042        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0555       |
|    explained_variance   | 0.000646      |
|    learning_rate        | 0.0003        |
|    loss                 | 155           |
|    n_updates            | 280           |
|    policy_gradient_loss | -0.000922     |
|    value_loss           | 339           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 362           |
|    iterations           | 30            |
|    time_elapsed         | 169           |
|    total_timesteps      | 61440         |
| train/                  |               |
|    approx_kl            | 0.00037754534 |
|    clip_fraction        | 0.00308       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0623       |
|    explained_variance   | 0.000199      |
|    learning_rate        | 0.0003        |
|    loss                 | 150           |
|    n_updates            | 290           |
|    policy_gradient_loss | -0.000999     |
|    value_loss           | 324           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 364           |
|    iterations           | 31            |
|    time_elapsed         | 174           |
|    total_timesteps      | 63488         |
| train/                  |               |
|    approx_kl            | 0.00036344014 |
|    clip_fraction        | 0.00244       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0732       |
|    explained_variance   | 3.53e-05      |
|    learning_rate        | 0.0003        |
|    loss                 | 154           |
|    n_updates            | 300           |
|    policy_gradient_loss | -0.00134      |
|    value_loss           | 339           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 365           |
|    iterations           | 32            |
|    time_elapsed         | 179           |
|    total_timesteps      | 65536         |
| train/                  |               |
|    approx_kl            | 0.00029333335 |
|    clip_fraction        | 0.00132       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0656       |
|    explained_variance   | -0.00111      |
|    learning_rate        | 0.0003        |
|    loss                 | 150           |
|    n_updates            | 310           |
|    policy_gradient_loss | -0.00059      |
|    value_loss           | 330           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 366           |
|    iterations           | 33            |
|    time_elapsed         | 184           |
|    total_timesteps      | 67584         |
| train/                  |               |
|    approx_kl            | 0.00020467435 |
|    clip_fraction        | 0.00322       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0599       |
|    explained_variance   | -0.000357     |
|    learning_rate        | 0.0003        |
|    loss                 | 156           |
|    n_updates            | 320           |
|    policy_gradient_loss | -0.0005       |
|    value_loss           | 342           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 367           |
|    iterations           | 34            |
|    time_elapsed         | 189           |
|    total_timesteps      | 69632         |
| train/                  |               |
|    approx_kl            | 0.00036096576 |
|    clip_fraction        | 0.00278       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0696       |
|    explained_variance   | 0.00048       |
|    learning_rate        | 0.0003        |
|    loss                 | 148           |
|    n_updates            | 330           |
|    policy_gradient_loss | -0.00108      |
|    value_loss           | 323           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 368          |
|    iterations           | 35           |
|    time_elapsed         | 194          |
|    total_timesteps      | 71680        |
| train/                  |              |
|    approx_kl            | 0.0013443009 |
|    clip_fraction        | 0.00386      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0633      |
|    explained_variance   | -0.000103    |
|    learning_rate        | 0.0003       |
|    loss                 | 156          |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.00219     |
|    value_loss           | 339          |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 36          |
|    time_elapsed         | 200         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.001375996 |
|    clip_fraction        | 0.0111      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0726     |
|    explained_variance   | -0.000349   |
|    learning_rate        | 0.0003      |
|    loss                 | 156         |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.00194    |
|    value_loss           | 339         |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 367           |
|    iterations           | 37            |
|    time_elapsed         | 206           |
|    total_timesteps      | 75776         |
| train/                  |               |
|    approx_kl            | 0.00097073324 |
|    clip_fraction        | 0.00884       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0752       |
|    explained_variance   | 0.000305      |
|    learning_rate        | 0.0003        |
|    loss                 | 156           |
|    n_updates            | 360           |
|    policy_gradient_loss | -0.0015       |
|    value_loss           | 344           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 368          |
|    iterations           | 38           |
|    time_elapsed         | 211          |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0013730555 |
|    clip_fraction        | 0.0144       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0921      |
|    explained_variance   | 0.000633     |
|    learning_rate        | 0.0003       |
|    loss                 | 151          |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.00236     |
|    value_loss           | 329          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 369           |
|    iterations           | 39            |
|    time_elapsed         | 216           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 0.00025597433 |
|    clip_fraction        | 0.00137       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0737       |
|    explained_variance   | 0.000417      |
|    learning_rate        | 0.0003        |
|    loss                 | 159           |
|    n_updates            | 380           |
|    policy_gradient_loss | -0.000957     |
|    value_loss           | 348           |
-------------------------------------------


## 5. Create the Custom Test Env

In [None]:
test_env = RL_Restrict_TradeEnv(test)

In [None]:
max_test_len = len(test) * 0.95
print(f"The length of our test data: {max_test_len}")

## 6. Test Env with Trained Model

In [None]:
try:
    os.remove(f"{folder_path}/networth_ppo.csv")
except OSError:
    pass

obs, _ = test_env.reset()

for i in trange(int(max_test_len)):
    action, _states = model_ppo.predict(obs)
    obs, rewards, terminated, truncated, info = test_env.step(action)
    # test_env.render()

test_env.close()