In [1]:
import os
import sys
import warnings

sys.path.append("..")

In [2]:
# pip install ray

In [3]:
# pip install rl_trading.zip

In [4]:
import gymnasium as Env
import numpy as np
import pandas as pd
import ray
import torch

from tqdm import trange
from copy import deepcopy
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.metrics import ENV_RUNNER_RESULTS
from ray.tune.registry import register_env



In [5]:
# %load_ext autoreload
# %autoreload 2

from rl_trading.environments.fx_environment import FxTradingEnv
from rl_trading.models.fx_model import FXModel

Todo

- Select hourly, minutely or daily data
- Truncation in days
- Debug code
- Add features

- Write tests:

1. Unknown currency in initial portfolio
2. Missing currency in initial portfolio
3. Portfolio value in base
4. Portfolio weights == 1
5. reset works
6. etc

In [6]:
current_porfolio = {
    "usd": 100_000,
    "eur": 100_000,
    "jpy": 100_000,
    # "sgd": 100_000,
}

In [7]:
historical_data = pd.read_parquet("data/FX_data.parquet.gzip")

In [8]:
env_config = {
    "historical_prices": historical_data[["eurjpy", "eurusd", "usdjpy"]],
    "initial_portfolio": current_porfolio,
    "start_datetime": pd.Timestamp("2011-01-03 09:00:00"),
}

In [9]:
def env_creator(env_config):
    """
    Create env
    """
    fx_env = FxTradingEnv(**env_config)
    fx_env.preprocess_data()
    return fx_env

In [10]:
register_env("fx_trading_env", env_creator)
ModelCatalog.register_custom_model("fx_model", FXModel)

In [11]:
ray.init(
    ignore_reinit_error=True,
    # To work with cloned repository
    # runtime_env={
    #     "working_dir": os.path.dirname(os.path.abspath(".")),
    #     "py_modules": [os.path.abspath(".")],
    #     "excludes": [
    #         "*.pyc", "__pycache__", "*.parquet.gzip",
    #         "data/", "notebooks/", "sample_data/"],
    # },
)

2026-02-06 09:48:03,500	INFO worker.py:2023 -- Started a local Ray instance.


0,1
Python version:,3.12.12
Ray version:,2.52.1


In [13]:
suggested_workers = 1

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
else:
    num_gpus = 0

suggested_workers, num_gpus

(1, 1)

In [14]:
config = PPOConfig()

config = (
    config.environment(
        env="fx_trading_env",
        env_config=env_config,
    )
    .framework("torch")
    .training(
        model={"custom_model": "fx_model"},
        lr=1e-3,
        train_batch_size=2048,
    )
    # .debugging(log_level="ERROR")
    .resources(num_gpus=num_gpus)
    .env_runners(
        num_cpus_per_env_runner=1,
        num_env_runners=suggested_workers,
        rollout_fragment_length="auto",
    )
    .api_stack(
        enable_rl_module_and_learner=False, enable_env_runner_and_connector_v2=False
    )
)

In [15]:
trainer = config.build_algo()

2026-02-06 09:50:16,689	INFO tensorboardx.py:45 -- pip install "ray[tune]" to see TensorBoard files.
[36m(pid=12838)[0m E0000 00:00:1770371417.475755   12838 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=12838)[0m E0000 00:00:1770371417.481867   12838 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=12838)[0m W0000 00:00:1770371417.498636   12838 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
[36m(pid=12838)[0m W0000 00:00:1770371417.498679   12838 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
[36m(pid=12838)[0m W0000 00:00:1770371417.498686   12838 computation_placer.cc:177] computation placer already

In [18]:
num_episodes = 100

In [19]:
# warnings.filterwarnings('ignore')
episode_lengths = []
episode_rewards = []

for i in trange(num_episodes):

    result = trainer.train()

    average_episode_reward = np.mean(
        result[ENV_RUNNER_RESULTS]["hist_stats"]["episode_reward"]
    )
    average_episode_length = np.mean(
        result[ENV_RUNNER_RESULTS]["hist_stats"]["episode_lengths"]
    )

    episode_lengths.append(average_episode_length)
    episode_rewards.append(average_episode_reward)

    if i % 5 == 0:
        print(f"Reward: {average_episode_reward}, Length: {average_episode_length}")

  1%|          | 1/100 [00:15<25:33, 15.49s/it]

Reward: -1.0004267313999937, Length: 1.3298701298701299


  6%|▌         | 6/100 [01:29<23:15, 14.85s/it]

Reward: -1.000229489810596, Length: 1.3411918795022921


 11%|█         | 11/100 [02:41<21:34, 14.55s/it]

Reward: -1.0002818789840107, Length: 1.3553937789543349


 16%|█▌        | 16/100 [03:53<20:11, 14.42s/it]

Reward: -1.0002237132410234, Length: 1.313662604233483


 21%|██        | 21/100 [05:04<18:57, 14.40s/it]

Reward: -1.0002976141625557, Length: 1.3473684210526315


 22%|██▏       | 22/100 [05:29<19:26, 14.96s/it]


KeyboardInterrupt: 