In [21]:
# Gym stuff
import gym_trading_env

# Stable baselines - RL stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

import gymnasium as gym
import numpy as np
# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [22]:
import pandas as pd
# Available in the github repo : examples/data/BTC_USD-Hourly.csv
url = "https://raw.githubusercontent.com/ClementPerroud/Gym-Trading-Env/main/examples/data/BTC_USD-Hourly.csv"
df = pd.read_csv(url, parse_dates=["date"], index_col= "date")
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)

In [23]:
# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()

# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]

# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]

# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]

 # Create the feature : volume[t] / max(*volume[t-7*24:t+1])
df["feature_volume"] = df["Volume USD"] / df["Volume USD"].rolling(7*24).max()

df.dropna(inplace= True)

In [24]:
env = gym.make("TradingEnv",
        name= "BTCUSD",
        df = df, # Your dataset with your custom features
        positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
    )

In [25]:
model = A2C('MlpPolicy', env, verbose=1)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [26]:
model.learn(total_timesteps=100000)

------------------------------------
| time/                 |          |
|    fps                | 311      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.923   |
|    explained_variance | -1.28    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0118   |
|    value_loss         | 0.000319 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 332      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.843   |
|    explained_variance | 0.118    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.0214   |
|    value_loss         | 0.000777 |
-

<stable_baselines3.a2c.a2c.A2C at 0x1b64a3a35e0>

In [28]:
model.save("E:\\github_clone\\Trader-RL\\models\\a2c_trading_model")

# Evaluation

In [32]:

terminated, truncated = False, False
obs, info = env.reset()

ep_rewards = []
cum_reward = 0.0

while not (terminated or truncated):
    action, _ = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    cum_reward += reward
    ep_rewards.append(reward)

print(f"Episode cumulative reward: {cum_reward:.6f}")

Market Return : 423.10%   |   Portfolio Return : 428.03%   |   
Episode cumulative reward: 1.663974


In [35]:
env.save_for_render(dir = "\\render_logs")

In [36]:
from gym_trading_env.renderer import Renderer
renderer = Renderer(render_logs_dir="E:\\github_clone\\Trader-RL\\render_logs")
renderer.run()

 * Serving Flask app 'gym_trading_env.renderer'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [16/Oct/2025 17:06:41] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [16/Oct/2025 17:06:43] "GET /update_data/BTCUSD_2025-10-16_17-04-52.pkl HTTP/1.1" 200 -
127.0.0.1 - - [16/Oct/2025 17:06:43] "GET /metrics HTTP/1.1" 200 -
