In [216]:
import numpy as np

import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

In [217]:
# df is a DataFrame with columns : "open", "high", "low", "close", "Volume USD"

# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True) # Clean again !
# Eatch step, the environment will return 5 inputs  : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"

In [218]:
import gymnasium as gym
import environments

#from environments import TradingEnv

In [219]:
import gym_trading_env

In [220]:
#import gymnasium as gym
#import gym_trading_env
#env = gym.make('MultiDatasetTradingEnv',
#    dataset_dir = 'preprocessed_data/*.pkl',
#    positions=[-1, 0, 1],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
#)

In [221]:
# Run 10 episodes
#for _ in range(10):
  # At every episode, the env will pick a new dataset.
#  done, truncated = False, False
#  observation, info = env.reset()
#  while not done and not truncated:
#      position_index = env.action_space.sample() # Pick random position index
#      observation, reward, done, truncated, info = env.step(position_index)

In [222]:
from environments import TradingEnv

# Create the environment directly
env = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.8, 0.0, 0.8],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [223]:
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import VecEnv

In [224]:
policy = "MlpPolicy"

In [225]:
model = PPO(policy, env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [226]:
# Experiment with different hyperparameters based on your problem and environment
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=0.001,  # Adjust as needed
    batch_size=32,  # Adjust based on memory and learning speed
    gamma=0.99,  # Discount factor
)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [227]:
model.learn(total_timesteps=1000000)

-----------------------------
| time/              |      |
|    fps             | 1117 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 737         |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012114517 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -26.1       |
|    learning_rate        | 0.001       |
|    loss                 | -0.0417     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00849    |
|    value_loss           | 0.00257     |
-----------------------------------------
----------------------------------

-----------------------------------------
| time/                   |             |
|    fps                  | 553         |
|    iterations           | 13          |
|    time_elapsed         | 48          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.004885153 |
|    clip_fraction        | 0.0296      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.02       |
|    explained_variance   | -0.0278     |
|    learning_rate        | 0.001       |
|    loss                 | -0.0306     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00154    |
|    value_loss           | 0.00068     |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 14           |
|    time_elapsed         | 51           |
|    total_timesteps      | 2

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | -2.89       |
| time/                   |             |
|    fps                  | 589         |
|    iterations           | 23          |
|    time_elapsed         | 79          |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.004374957 |
|    clip_fraction        | 0.0146      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.00387     |
|    learning_rate        | 0.001       |
|    loss                 | 0.0103      |
|    n_updates            | 220         |
|    policy_gradient_loss | 0.000204    |
|    value_loss           | 0.0003      |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.31e+04

Market Return : 423.10%   |   Portfolio Return : -78.93%   |   
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -2.23        |
| time/                   |              |
|    fps                  | 587          |
|    iterations           | 33           |
|    time_elapsed         | 114          |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0034569954 |
|    clip_fraction        | 0.0322       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.852       |
|    explained_variance   | -0.0287      |
|    learning_rate        | 0.001        |
|    loss                 | -0.0191      |
|    n_updates            | 320          |
|    policy_gradient_loss | -0.000945    |
|    value_loss           | 0.000222     |
------------------------------------------
---------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -2.23        |
| time/                   |              |
|    fps                  | 588          |
|    iterations           | 43           |
|    time_elapsed         | 149          |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0059452783 |
|    clip_fraction        | 0.0788       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.665       |
|    explained_variance   | 0.0168       |
|    learning_rate        | 0.001        |
|    loss                 | 0.0109       |
|    n_updates            | 420          |
|    policy_gradient_loss | -0.00563     |
|    value_loss           | 0.000105     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -1.55        |
| time/                   |              |
|    fps                  | 599          |
|    iterations           | 53           |
|    time_elapsed         | 180          |
|    total_timesteps      | 108544       |
| train/                  |              |
|    approx_kl            | 0.0036359648 |
|    clip_fraction        | 0.0282       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.218       |
|    explained_variance   | -0.0227      |
|    learning_rate        | 0.001        |
|    loss                 | -0.00722     |
|    n_updates            | 520          |
|    policy_gradient_loss | -0.0019      |
|    value_loss           | 0.000134     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -1.55        |
| time/                   |              |
|    fps                  | 604          |
|    iterations           | 63           |
|    time_elapsed         | 213          |
|    total_timesteps      | 129024       |
| train/                  |              |
|    approx_kl            | 0.0007803405 |
|    clip_fraction        | 0.00859      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.115       |
|    explained_variance   | 0.00136      |
|    learning_rate        | 0.001        |
|    loss                 | -0.000805    |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.000174    |
|    value_loss           | 0.000506     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -0.955       |
| time/                   |              |
|    fps                  | 606          |
|    iterations           | 73           |
|    time_elapsed         | 246          |
|    total_timesteps      | 149504       |
| train/                  |              |
|    approx_kl            | 0.0040235817 |
|    clip_fraction        | 0.0216       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.151       |
|    explained_variance   | -0.0414      |
|    learning_rate        | 0.001        |
|    loss                 | 0.00345      |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.00174     |
|    value_loss           | 0.000179     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -0.633       |
| time/                   |              |
|    fps                  | 608          |
|    iterations           | 83           |
|    time_elapsed         | 279          |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0012988915 |
|    clip_fraction        | 0.0118       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0932      |
|    explained_variance   | -2.91e-05    |
|    learning_rate        | 0.001        |
|    loss                 | 0.00125      |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.000152    |
|    value_loss           | 0.000265     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -0.633       |
| time/                   |              |
|    fps                  | 612          |
|    iterations           | 93           |
|    time_elapsed         | 310          |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0035487595 |
|    clip_fraction        | 0.0127       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0618      |
|    explained_variance   | 0.000762     |
|    learning_rate        | 0.001        |
|    loss                 | 0.00763      |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.000651    |
|    value_loss           | 0.000263     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | -0.312       |
| time/                   |              |
|    fps                  | 615          |
|    iterations           | 103          |
|    time_elapsed         | 342          |
|    total_timesteps      | 210944       |
| train/                  |              |
|    approx_kl            | 0.0011332983 |
|    clip_fraction        | 0.0136       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.118       |
|    explained_variance   | 0.0111       |
|    learning_rate        | 0.001        |
|    loss                 | 4.82e-05     |
|    n_updates            | 1020         |
|    policy_gradient_loss | -0.00062     |
|    value_loss           | 0.000522     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | -0.312      |
| time/                   |             |
|    fps                  | 618         |
|    iterations           | 113         |
|    time_elapsed         | 374         |
|    total_timesteps      | 231424      |
| train/                  |             |
|    approx_kl            | 0.001774499 |
|    clip_fraction        | 0.0197      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.12       |
|    explained_variance   | -0.0105     |
|    learning_rate        | 0.001       |
|    loss                 | 0.00815     |
|    n_updates            | 1120        |
|    policy_gradient_loss | -0.000405   |
|    value_loss           | 0.000251    |
-----------------------------------------
Market Return : 423.10%   |   Portfolio Return : 111.46%   |   
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | -0.16       |
| time/                   |             |
|    fps                  | 613         |
|    iterations           | 123         |
|    time_elapsed         | 410         |
|    total_timesteps      | 251904      |
| train/                  |             |
|    approx_kl            | 0.005038224 |
|    clip_fraction        | 0.0354      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.19       |
|    explained_variance   | -0.0156     |
|    learning_rate        | 0.001       |
|    loss                 | -0.0067     |
|    n_updates            | 1220        |
|    policy_gradient_loss | -0.00233    |
|    value_loss           | 0.000239    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.0449       |
| time/                   |              |
|    fps                  | 615          |
|    iterations           | 133          |
|    time_elapsed         | 442          |
|    total_timesteps      | 272384       |
| train/                  |              |
|    approx_kl            | 0.0014677405 |
|    clip_fraction        | 0.0201       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.15        |
|    explained_variance   | -0.00197     |
|    learning_rate        | 0.001        |
|    loss                 | -0.0116      |
|    n_updates            | 1320         |
|    policy_gradient_loss | -0.000997    |
|    value_loss           | 0.000407     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.0449       |
| time/                   |              |
|    fps                  | 616          |
|    iterations           | 143          |
|    time_elapsed         | 474          |
|    total_timesteps      | 292864       |
| train/                  |              |
|    approx_kl            | 0.0023140018 |
|    clip_fraction        | 0.0224       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.118       |
|    explained_variance   | 0.00888      |
|    learning_rate        | 0.001        |
|    loss                 | 0.00163      |
|    n_updates            | 1420         |
|    policy_gradient_loss | -0.000943    |
|    value_loss           | 0.000378     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.122        |
| time/                   |              |
|    fps                  | 618          |
|    iterations           | 153          |
|    time_elapsed         | 506          |
|    total_timesteps      | 313344       |
| train/                  |              |
|    approx_kl            | 0.0065306816 |
|    clip_fraction        | 0.0447       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.132       |
|    explained_variance   | -0.028       |
|    learning_rate        | 0.001        |
|    loss                 | 0.00852      |
|    n_updates            | 1520         |
|    policy_gradient_loss | -0.00566     |
|    value_loss           | 0.000352     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.143        |
| time/                   |              |
|    fps                  | 618          |
|    iterations           | 163          |
|    time_elapsed         | 539          |
|    total_timesteps      | 333824       |
| train/                  |              |
|    approx_kl            | 0.0038061833 |
|    clip_fraction        | 0.0543       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.191       |
|    explained_variance   | -0.00225     |
|    learning_rate        | 0.001        |
|    loss                 | 0.00565      |
|    n_updates            | 1620         |
|    policy_gradient_loss | -0.00364     |
|    value_loss           | 0.000272     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.143        |
| time/                   |              |
|    fps                  | 616          |
|    iterations           | 173          |
|    time_elapsed         | 574          |
|    total_timesteps      | 354304       |
| train/                  |              |
|    approx_kl            | 0.0031469376 |
|    clip_fraction        | 0.0429       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.304       |
|    explained_variance   | -0.00613     |
|    learning_rate        | 0.001        |
|    loss                 | 0.00224      |
|    n_updates            | 1720         |
|    policy_gradient_loss | -0.000747    |
|    value_loss           | 0.000118     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.218       |
| time/                   |             |
|    fps                  | 614         |
|    iterations           | 183         |
|    time_elapsed         | 609         |
|    total_timesteps      | 374784      |
| train/                  |             |
|    approx_kl            | 0.004125744 |
|    clip_fraction        | 0.0505      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.237      |
|    explained_variance   | -0.00161    |
|    learning_rate        | 0.001       |
|    loss                 | 0.0292      |
|    n_updates            | 1820        |
|    policy_gradient_loss | -0.00121    |
|    value_loss           | 0.000275    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.218        |
| time/                   |              |
|    fps                  | 613          |
|    iterations           | 193          |
|    time_elapsed         | 644          |
|    total_timesteps      | 395264       |
| train/                  |              |
|    approx_kl            | 0.0033490309 |
|    clip_fraction        | 0.0238       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.137       |
|    explained_variance   | 0.00305      |
|    learning_rate        | 0.001        |
|    loss                 | 0.00693      |
|    n_updates            | 1920         |
|    policy_gradient_loss | -0.000328    |
|    value_loss           | 0.000265     |
------------------------------------------
Market Return : 423.10%   |   Portfolio Return : 47.53%   |   
----------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.232       |
| time/                   |             |
|    fps                  | 611         |
|    iterations           | 203         |
|    time_elapsed         | 680         |
|    total_timesteps      | 415744      |
| train/                  |             |
|    approx_kl            | 0.004496223 |
|    clip_fraction        | 0.034       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.199      |
|    explained_variance   | 0.00754     |
|    learning_rate        | 0.001       |
|    loss                 | 0.00271     |
|    n_updates            | 2020        |
|    policy_gradient_loss | -0.00236    |
|    value_loss           | 0.000573    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.244        |
| time/                   |              |
|    fps                  | 605          |
|    iterations           | 213          |
|    time_elapsed         | 720          |
|    total_timesteps      | 436224       |
| train/                  |              |
|    approx_kl            | 0.0032306064 |
|    clip_fraction        | 0.0373       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.139       |
|    explained_variance   | 0.00183      |
|    learning_rate        | 0.001        |
|    loss                 | -0.00797     |
|    n_updates            | 2120         |
|    policy_gradient_loss | -0.00286     |
|    value_loss           | 0.000112     |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.244        |
| time/                   |              |
|    fps                  | 605          |
|    iterations           | 223          |
|    time_elapsed         | 754          |
|    total_timesteps      | 456704       |
| train/                  |              |
|    approx_kl            | 0.0067992695 |
|    clip_fraction        | 0.0615       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.225       |
|    explained_variance   | -0.000733    |
|    learning_rate        | 0.001        |
|    loss                 | 0.0129       |
|    n_updates            | 2220         |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 0.000541     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.241       |
| time/                   |             |
|    fps                  | 604         |
|    iterations           | 233         |
|    time_elapsed         | 789         |
|    total_timesteps      | 477184      |
| train/                  |             |
|    approx_kl            | 0.016224064 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.589      |
|    explained_variance   | -0.0216     |
|    learning_rate        | 0.001       |
|    loss                 | 0.102       |
|    n_updates            | 2320        |
|    policy_gradient_loss | -0.00124    |
|    value_loss           | 0.00027     |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

Market Return : 423.10%   |   Portfolio Return : -78.51%   |   
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.122        |
| time/                   |              |
|    fps                  | 602          |
|    iterations           | 243          |
|    time_elapsed         | 826          |
|    total_timesteps      | 497664       |
| train/                  |              |
|    approx_kl            | 0.0040630177 |
|    clip_fraction        | 0.0695       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.512       |
|    explained_variance   | -0.00527     |
|    learning_rate        | 0.001        |
|    loss                 | 0.00121      |
|    n_updates            | 2420         |
|    policy_gradient_loss | -0.000844    |
|    value_loss           | 0.000212     |
------------------------------------------
---------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.122       |
| time/                   |             |
|    fps                  | 601         |
|    iterations           | 253         |
|    time_elapsed         | 861         |
|    total_timesteps      | 518144      |
| train/                  |             |
|    approx_kl            | 0.008895485 |
|    clip_fraction        | 0.0543      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.352      |
|    explained_variance   | -0.101      |
|    learning_rate        | 0.001       |
|    loss                 | 0.00323     |
|    n_updates            | 2520        |
|    policy_gradient_loss | -0.00352    |
|    value_loss           | 0.000115    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.082       |
| time/                   |             |
|    fps                  | 597         |
|    iterations           | 263         |
|    time_elapsed         | 901         |
|    total_timesteps      | 538624      |
| train/                  |             |
|    approx_kl            | 0.006153092 |
|    clip_fraction        | 0.0641      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.349      |
|    explained_variance   | -0.000725   |
|    learning_rate        | 0.001       |
|    loss                 | -0.00105    |
|    n_updates            | 2620        |
|    policy_gradient_loss | -0.00418    |
|    value_loss           | 0.000175    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.082       |
| time/                   |             |
|    fps                  | 596         |
|    iterations           | 273         |
|    time_elapsed         | 937         |
|    total_timesteps      | 559104      |
| train/                  |             |
|    approx_kl            | 0.006756286 |
|    clip_fraction        | 0.0669      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.401      |
|    explained_variance   | -0.0105     |
|    learning_rate        | 0.001       |
|    loss                 | 0.00718     |
|    n_updates            | 2720        |
|    policy_gradient_loss | -0.00245    |
|    value_loss           | 0.000529    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.31e+04   |
|    ep_rew_mean          | 0.0409     |
| time/                   |            |
|    fps                  | 591        |
|    iterations           | 283        |
|    time_elapsed         | 980        |
|    total_timesteps      | 579584     |
| train/                  |            |
|    approx_kl            | 0.00460062 |
|    clip_fraction        | 0.036      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.211     |
|    explained_variance   | -0.019     |
|    learning_rate        | 0.001      |
|    loss                 | -0.0162    |
|    n_updates            | 2820       |
|    policy_gradient_loss | -0.0017    |
|    value_loss           | 0.000146   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.0308      |
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 293         |
|    time_elapsed         | 1031        |
|    total_timesteps      | 600064      |
| train/                  |             |
|    approx_kl            | 0.003394709 |
|    clip_fraction        | 0.0414      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.198      |
|    explained_variance   | -0.00606    |
|    learning_rate        | 0.001       |
|    loss                 | 0.00023     |
|    n_updates            | 2920        |
|    policy_gradient_loss | -0.00285    |
|    value_loss           | 0.000263    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.0308      |
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 303         |
|    time_elapsed         | 1067        |
|    total_timesteps      | 620544      |
| train/                  |             |
|    approx_kl            | 0.005129204 |
|    clip_fraction        | 0.049       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.198      |
|    explained_variance   | -0.00737    |
|    learning_rate        | 0.001       |
|    loss                 | -0.016      |
|    n_updates            | 3020        |
|    policy_gradient_loss | -0.00395    |
|    value_loss           | 0.000233    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.0481      |
| time/                   |             |
|    fps                  | 583         |
|    iterations           | 313         |
|    time_elapsed         | 1099        |
|    total_timesteps      | 641024      |
| train/                  |             |
|    approx_kl            | 0.006215792 |
|    clip_fraction        | 0.0449      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.152      |
|    explained_variance   | -0.000411   |
|    learning_rate        | 0.001       |
|    loss                 | -0.00951    |
|    n_updates            | 3120        |
|    policy_gradient_loss | -0.000427   |
|    value_loss           | 0.000492    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.0481       |
| time/                   |              |
|    fps                  | 584          |
|    iterations           | 323          |
|    time_elapsed         | 1132         |
|    total_timesteps      | 661504       |
| train/                  |              |
|    approx_kl            | 0.0072374437 |
|    clip_fraction        | 0.0689       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.291       |
|    explained_variance   | 0.000427     |
|    learning_rate        | 0.001        |
|    loss                 | -0.0118      |
|    n_updates            | 3220         |
|    policy_gradient_loss | -0.00401     |
|    value_loss           | 0.000234     |
------------------------------------------
Market Return : 423.10%   |   Portfolio Return : 15.43%   |   
----------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.0529      |
| time/                   |             |
|    fps                  | 585         |
|    iterations           | 333         |
|    time_elapsed         | 1165        |
|    total_timesteps      | 681984      |
| train/                  |             |
|    approx_kl            | 0.004740346 |
|    clip_fraction        | 0.0417      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.227      |
|    explained_variance   | -0.0103     |
|    learning_rate        | 0.001       |
|    loss                 | 0.00502     |
|    n_updates            | 3320        |
|    policy_gradient_loss | -0.000984   |
|    value_loss           | 0.000259    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.112        |
| time/                   |              |
|    fps                  | 586          |
|    iterations           | 343          |
|    time_elapsed         | 1197         |
|    total_timesteps      | 702464       |
| train/                  |              |
|    approx_kl            | 0.0053536724 |
|    clip_fraction        | 0.036        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.153       |
|    explained_variance   | 0.00145      |
|    learning_rate        | 0.001        |
|    loss                 | -0.00376     |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00232     |
|    value_loss           | 0.000389     |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.112        |
| time/                   |              |
|    fps                  | 587          |
|    iterations           | 353          |
|    time_elapsed         | 1230         |
|    total_timesteps      | 722944       |
| train/                  |              |
|    approx_kl            | 0.0067429235 |
|    clip_fraction        | 0.0581       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.298       |
|    explained_variance   | 0.00537      |
|    learning_rate        | 0.001        |
|    loss                 | 0.00846      |
|    n_updates            | 3520         |
|    policy_gradient_loss | -0.00165     |
|    value_loss           | 0.000313     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.137       |
| time/                   |             |
|    fps                  | 587         |
|    iterations           | 363         |
|    time_elapsed         | 1265        |
|    total_timesteps      | 743424      |
| train/                  |             |
|    approx_kl            | 0.006955264 |
|    clip_fraction        | 0.0657      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.263      |
|    explained_variance   | -0.00547    |
|    learning_rate        | 0.001       |
|    loss                 | -0.0284     |
|    n_updates            | 3620        |
|    policy_gradient_loss | -0.00349    |
|    value_loss           | 0.000171    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.1         |
| time/                   |             |
|    fps                  | 587         |
|    iterations           | 373         |
|    time_elapsed         | 1299        |
|    total_timesteps      | 763904      |
| train/                  |             |
|    approx_kl            | 0.006305235 |
|    clip_fraction        | 0.0555      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.217      |
|    explained_variance   | -0.00214    |
|    learning_rate        | 0.001       |
|    loss                 | -0.0221     |
|    n_updates            | 3720        |
|    policy_gradient_loss | -0.00327    |
|    value_loss           | 0.000323    |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.31e+04

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.1          |
| time/                   |              |
|    fps                  | 587          |
|    iterations           | 383          |
|    time_elapsed         | 1334         |
|    total_timesteps      | 784384       |
| train/                  |              |
|    approx_kl            | 0.0044615306 |
|    clip_fraction        | 0.0302       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0808      |
|    explained_variance   | -0.00358     |
|    learning_rate        | 0.001        |
|    loss                 | 5.36e-06     |
|    n_updates            | 3820         |
|    policy_gradient_loss | -0.00259     |
|    value_loss           | 0.000111     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.147       |
| time/                   |             |
|    fps                  | 588         |
|    iterations           | 393         |
|    time_elapsed         | 1368        |
|    total_timesteps      | 804864      |
| train/                  |             |
|    approx_kl            | 0.010014884 |
|    clip_fraction        | 0.0774      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.307      |
|    explained_variance   | -0.0113     |
|    learning_rate        | 0.001       |
|    loss                 | 0.0038      |
|    n_updates            | 3920        |
|    policy_gradient_loss | -0.00322    |
|    value_loss           | 0.000174    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.147       |
| time/                   |             |
|    fps                  | 588         |
|    iterations           | 403         |
|    time_elapsed         | 1401        |
|    total_timesteps      | 825344      |
| train/                  |             |
|    approx_kl            | 0.009456221 |
|    clip_fraction        | 0.0512      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.209      |
|    explained_variance   | -0.00597    |
|    learning_rate        | 0.001       |
|    loss                 | -0.00204    |
|    n_updates            | 4020        |
|    policy_gradient_loss | -0.0032     |
|    value_loss           | 0.000279    |
-----------------------------------------
Market Return : 423.10%   |   Portfolio Return : 121.28%   |   
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.173       |
| time/                   |             |
|    fps                  | 590         |
|    iterations           | 413         |
|    time_elapsed         | 1433        |
|    total_timesteps      | 845824      |
| train/                  |             |
|    approx_kl            | 0.009010049 |
|    clip_fraction        | 0.0713      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.302      |
|    explained_variance   | -0.00839    |
|    learning_rate        | 0.001       |
|    loss                 | 0.00129     |
|    n_updates            | 4120        |
|    policy_gradient_loss | -0.00213    |
|    value_loss           | 0.000622    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.197       |
| time/                   |             |
|    fps                  | 591         |
|    iterations           | 423         |
|    time_elapsed         | 1465        |
|    total_timesteps      | 866304      |
| train/                  |             |
|    approx_kl            | 0.010228186 |
|    clip_fraction        | 0.0854      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.336      |
|    explained_variance   | -0.0138     |
|    learning_rate        | 0.001       |
|    loss                 | -0.0272     |
|    n_updates            | 4220        |
|    policy_gradient_loss | -0.00433    |
|    value_loss           | 0.000126    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.197       |
| time/                   |             |
|    fps                  | 592         |
|    iterations           | 433         |
|    time_elapsed         | 1497        |
|    total_timesteps      | 886784      |
| train/                  |             |
|    approx_kl            | 0.013140325 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.655      |
|    explained_variance   | -0.00116    |
|    learning_rate        | 0.001       |
|    loss                 | -0.0105     |
|    n_updates            | 4320        |
|    policy_gradient_loss | -0.0059     |
|    value_loss           | 0.00049     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.154       |
| time/                   |             |
|    fps                  | 593         |
|    iterations           | 443         |
|    time_elapsed         | 1529        |
|    total_timesteps      | 907264      |
| train/                  |             |
|    approx_kl            | 0.012239219 |
|    clip_fraction        | 0.13        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.573      |
|    explained_variance   | -0.0314     |
|    learning_rate        | 0.001       |
|    loss                 | -0.0109     |
|    n_updates            | 4420        |
|    policy_gradient_loss | -0.00444    |
|    value_loss           | 0.000215    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+

Market Return : 423.10%   |   Portfolio Return : -71.02%   |   
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.105       |
| time/                   |             |
|    fps                  | 593         |
|    iterations           | 453         |
|    time_elapsed         | 1563        |
|    total_timesteps      | 927744      |
| train/                  |             |
|    approx_kl            | 0.009783761 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.505      |
|    explained_variance   | -0.0171     |
|    learning_rate        | 0.001       |
|    loss                 | -0.0214     |
|    n_updates            | 4520        |
|    policy_gradient_loss | -0.00589    |
|    value_loss           | 0.000151    |
-----------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.105        |
| time/                   |              |
|    fps                  | 593          |
|    iterations           | 463          |
|    time_elapsed         | 1597         |
|    total_timesteps      | 948224       |
| train/                  |              |
|    approx_kl            | 0.0056185266 |
|    clip_fraction        | 0.0736       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.397       |
|    explained_variance   | -0.0559      |
|    learning_rate        | 0.001        |
|    loss                 | 0.0116       |
|    n_updates            | 4620         |
|    policy_gradient_loss | -0.00133     |
|    value_loss           | 9.09e-05     |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31e+04     |
|    ep_rew_mean          | 0.0929       |
| time/                   |              |
|    fps                  | 594          |
|    iterations           | 473          |
|    time_elapsed         | 1630         |
|    total_timesteps      | 968704       |
| train/                  |              |
|    approx_kl            | 0.0106566055 |
|    clip_fraction        | 0.115        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.51        |
|    explained_variance   | 0.00673      |
|    learning_rate        | 0.001        |
|    loss                 | -0.0171      |
|    n_updates            | 4720         |
|    policy_gradient_loss | -0.00183     |
|    value_loss           | 0.0002       |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.31e+04    |
|    ep_rew_mean          | 0.0929      |
| time/                   |             |
|    fps                  | 594         |
|    iterations           | 483         |
|    time_elapsed         | 1662        |
|    total_timesteps      | 989184      |
| train/                  |             |
|    approx_kl            | 0.007496462 |
|    clip_fraction        | 0.0847      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.468      |
|    explained_variance   | 0.0221      |
|    learning_rate        | 0.001       |
|    loss                 | 0.00367     |
|    n_updates            | 4820        |
|    policy_gradient_loss | -0.00149    |
|    value_loss           | 0.000537    |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.31

<stable_baselines3.ppo.ppo.PPO at 0x7f24cab632d0>

In [228]:
model.save("my_trading_agent")

In [229]:
# Create the environment directly
env2 = TradingEnv(
    name="BTCUSD",
    df=df,  # Your dataset with your custom features
    positions=[-0.8, 0, 0.8],  # -1 (=SHORT), 0(=OUT), +1 (=LONG)
    trading_fees=0.01/100,  # 0.01% per stock buy/sell (Binance fees)
    borrow_interest_rate=0.0003/100,  # 0.0003% per timestep (one timestep = 1h here)
)

In [230]:
# Run an episode until it ends :
done, truncated = False, False
obs, info = env2.reset()
while not done and not truncated:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env2.step(action)
    # ... (evaluate further)

Market Return : 423.10%   |   Portfolio Return : 240.58%   |   


In [231]:
# At the end of the episode you want to render
env2.unwrapped.save_for_render(dir="render_logs")