In [6]:
import os
import pandas as pd
import numpy as np
import ray
import gym
import matplotlib.pyplot as plt
from ray.rllib.algorithms.impala import ImpalaConfig

ImpalaTrainer = ImpalaConfig().build

from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env

from env_trading import MultiAgentTradingEnv
from model_architecture import SharedLSTMModel

In [7]:
from ray.rllib.algorithms.impala import ImpalaConfig

print("ANTES:", type(ImpalaConfig))  # Esperado: <class 'type'>

ANTES: <class 'abc.ABCMeta'>


In [2]:
import sys
print(sys.executable)
print(sys.version)


/usr/bin/python3.10
3.10.17 (main, Apr  9 2025, 08:54:14) [GCC 13.3.0]


In [5]:
import ray
from ray.rllib.agents.impala import ImpalaTrainer


ModuleNotFoundError: No module named 'pkg_resources._vendor'

In [None]:
ray.init(ignore_reinit_error=True, include_dashboard=False)


In [None]:
ModelCatalog.register_custom_model("shared_lstm_model", SharedLSTMModel)


In [None]:
def create_env(env_config):
    price_df = pd.read_csv(env_config["price_path"], index_col=0)
    return_df = pd.read_csv(env_config["return_path"], index_col=0)
    asset_types = env_config["asset_types"]
    return MultiAgentTradingEnv(
        price_df=price_df,
        log_return_df=return_df,
        asset_types=asset_types,
        initial_cash=env_config.get("initial_cash", 1e6),
        transaction_fee=env_config.get("transaction_fee", 0.001),
        future_discount=env_config.get("future_discount", 0.001)
    )

register_env("MultiAgentTradingEnv-v0", create_env)

In [None]:
price_path = "./data/processed/price_data_eval.csv"
return_path = "./data/processed/log_return_data_eval.csv"
checkpoint_path = "./results/impala_trading_experiment/checkpoint_000100"
asset_types = ["equity"] * 10 + ["future"]


In [None]:
config = {
    "env": "MultiAgentTradingEnv-v0",
    "env_config": {
        "price_path": price_path,
        "return_path": return_path,
        "asset_types": asset_types,
    },
    "framework": "torch",
    "multiagent": {
        "policies": {
            "shared_policy": (
                None,
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32),
                gym.spaces.Discrete(3),
                {}
            )
        },
        "policy_mapping_fn": lambda agent_id, episode, **kwargs: "shared_policy",
    },
    "model": {
        "custom_model": "shared_lstm_model",
        "max_seq_len": 20,
        "custom_model_config": {
            "lstm_cell_size": 256
        }
    }
}

agent = ImpalaTrainer(config=config)
agent.restore(checkpoint_path)

In [None]:
env = create_env(config["env_config"])
obs = env.reset()
done = {"__all__": False}

portfolio_values = []
dates = env.price_df.index

while not done["__all__"]:
    actions = {}
    for agent_id, agent_obs in obs.items():
        action, _, _ = agent.compute_single_action(agent_obs, policy_id="shared_policy")
        actions[agent_id] = action
    obs, rewards, done, info = env.step(actions)
    prices = env.price_df.iloc[env.current_step].values
    portfolio_value = env.cash + np.dot(env.positions, prices)
    portfolio_values.append(portfolio_value)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(dates[:len(portfolio_values)], portfolio_values, label="RL Portfolio Value")
plt.title("Evolução do Portfólio do Agente RL")
plt.xlabel("Data")
plt.ylabel("Valor do Portfólio")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()