In [2]:
import os
import pandas as pd
import numpy as np
import torch
from ray.rllib.algorithms.algorithm import Algorithm
from env_trading import MultiAgentTradingEnv
from model_architecture import SharedLSTMModel
from ray.rllib.models import ModelCatalog
from utils import policy_mapping_fn
from ray.tune.registry import register_env

ModelCatalog.register_custom_model("shared_lstm_model", SharedLSTMModel)

In [6]:
# === Registre o ambiente customizado ===
def create_env(env_config):
    price_df = pd.read_csv(env_config["price_path"], index_col=0).astype(np.float32)
    return_df = pd.read_csv(env_config["return_path"], index_col=0).astype(np.float32)
    asset_types = env_config["asset_types"]

    return MultiAgentTradingEnv(
        price_df=price_df,
        log_return_df=return_df,
        asset_types=asset_types,
        initial_cash=env_config.get("initial_cash", 1e6),
        transaction_fee=env_config.get("transaction_fee", 0.001),
        future_discount=env_config.get("future_discount", 0.001),
    )

register_env("MultiAgentTradingEnv-v0", create_env)

In [3]:
price_df = pd.read_csv("data/processed/raw_prices.csv", index_col=0).astype(np.float32)
return_df = pd.read_csv("data/processed/returns_log.csv", index_col=0).astype(np.float32)

print("raw_prices shape:", price_df.shape)
print("returns_log shape:", return_df.shape)

print("raw_prices.columns:", price_df.columns.tolist())
print("returns_log.columns:", return_df.columns.tolist())

raw_prices shape: (667, 11)
returns_log shape: (667, 11)
raw_prices.columns: ['AAPL', 'AMZN', 'GOOGL', 'INTC', 'META', 'MSFT', 'NFLX', 'NVDA', 'TSLA', 'XOM', 'SPY']
returns_log.columns: ['AAPL', 'AMZN', 'GOOGL', 'INTC', 'META', 'MSFT', 'NFLX', 'NVDA', 'TSLA', 'XOM', 'SPY']


In [None]:
# === Registre o ambiente customizado ===
def create_env(env_config):
    price_df = pd.read_csv(env_config["price_path"], index_col=0).astype(np.float32)
    return_df = pd.read_csv(env_config["return_path"], index_col=0).astype(np.float32)

    # Garantir alinhamento
    common_columns = price_df.columns.intersection(return_df.columns)
    price_df = price_df[common_columns]
    return_df = return_df[common_columns]

    min_len = min(len(price_df), len(return_df))
    price_df = price_df.iloc[:min_len]
    return_df = return_df.iloc[:min_len]

    return MultiAgentTradingEnv(
        price_df=price_df,
        log_return_df=return_df,
        asset_types=env_config["asset_types"],
        initial_cash=env_config.get("initial_cash", 1e6),
        transaction_fee=env_config.get("transaction_fee", 0.001),
        future_discount=env_config.get("future_discount", 0.001),
    )

register_env("MultiAgentTradingEnv-v0", create_env)

# === Load checkpoint ===
checkpoint_path = "results/impala_trading_experiment/IMPALA_MultiAgentTradingEnv-v0_eaf2c_00000_0_2025-06-02_18-31-17/checkpoint_000009"
algo = Algorithm.from_checkpoint(checkpoint_path)

print(algo.get_policy("shared_policy").model.base_model._device)

# === Setup environment manually ===
price_path = "data/processed/raw_prices.csv"
return_path = "data/processed/returns_log.csv"
asset_types = ["equity"] * 10 + ["future"]

price_df = pd.read_csv(price_path, index_col=0).astype(np.float32).iloc[101:150]
return_df = pd.read_csv(return_path, index_col=0).astype(np.float32).iloc[101:150]

# Garante mesmas colunas e ordem
common_columns = price_df.columns.intersection(return_df.columns)
price_df = price_df[common_columns]
return_df = return_df[common_columns]

env = MultiAgentTradingEnv(
    price_df=price_df,
    log_return_df=return_df,
    asset_types=asset_types,
    initial_cash=1e6,
    transaction_fee=0.001,
    future_discount=0.001,
)

# === Run one episode ===
obs, _ = env.reset()
done = {"__all__": False}
total_rewards = {agent_id: 0 for agent_id in obs.keys()}

while not done["__all__"]:
    actions = {
        agent_id: algo.compute_single_action(obs[agent_id], policy_id=policy_mapping_fn(agent_id))[0]
        for agent_id in obs
    }
    obs, rewards, done, info = env.step(actions)

    for agent_id, reward in rewards.items():
        total_rewards[agent_id] += reward

# === Show results ===
print("\n🎯 Avaliação Final:")
for agent_id, reward in total_rewards.items():
    print(f"Agente {agent_id}: recompensa total = {reward:.2f}")




`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2025-06-02 20:22:59,886	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
Exception in thread ray_print_logs:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/

AttributeError: 'SharedLSTMModel' object has no attribute 'base_model'

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff6cd05e5f82dc87ff6072e1af01000000 Worker ID: 510b01474459eeb05f86c96948a71fe28d6e370fc3bfac13c9ad089e Node ID: 85fe2db495b998dd799ace93a9a7d1dbc2b65e0b1f54ac17cdbfa2e9 Worker IP address: 172.19.18.16 Worker port: 41055 Worker PID: 491109 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.
[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffffd1fe57bf81673aa05692c3d701

In [14]:
price_df = pd.read_csv("data/processed/raw_prices.csv", index_col=0).astype(np.float32)
return_df = pd.read_csv("data/processed/returns_log.csv", index_col=0).astype(np.float32)

print("raw_prices shape:", price_df.shape)
print("returns_log shape:", return_df.shape)

print("raw_prices.columns:", price_df.columns.tolist())
print("returns_log.columns:", return_df.columns.tolist())

In [None]:
ray.init(ignore_reinit_error=True, include_dashboard=False)


In [None]:
ModelCatalog.register_custom_model("shared_lstm_model", SharedLSTMModel)


In [None]:
def create_env(env_config):
    price_df = pd.read_csv(env_config["price_path"], index_col=0)
    return_df = pd.read_csv(env_config["return_path"], index_col=0)
    asset_types = env_config["asset_types"]
    return MultiAgentTradingEnv(
        price_df=price_df,
        log_return_df=return_df,
        asset_types=asset_types,
        initial_cash=env_config.get("initial_cash", 1e6),
        transaction_fee=env_config.get("transaction_fee", 0.001),
        future_discount=env_config.get("future_discount", 0.001)
    )

register_env("MultiAgentTradingEnv-v0", create_env)

In [None]:
price_path = "./data/processed/price_data_eval.csv"
return_path = "./data/processed/log_return_data_eval.csv"
checkpoint_path = "./results/impala_trading_experiment/checkpoint_000100"
asset_types = ["equity"] * 10 + ["future"]


In [None]:
config = {
    "env": "MultiAgentTradingEnv-v0",
    "env_config": {
        "price_path": price_path,
        "return_path": return_path,
        "asset_types": asset_types,
    },
    "framework": "torch",
    "multiagent": {
        "policies": {
            "shared_policy": (
                None,
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32),
                gym.spaces.Discrete(3),
                {}
            )
        },
        "policy_mapping_fn": lambda agent_id, episode, **kwargs: "shared_policy",
    },
    "model": {
        "custom_model": "shared_lstm_model",
        "max_seq_len": 20,
        "custom_model_config": {
            "lstm_cell_size": 256
        }
    }
}

agent = ImpalaTrainer(config=config)
agent.restore(checkpoint_path)

In [None]:
env = create_env(config["env_config"])
obs = env.reset()
done = {"__all__": False}

portfolio_values = []
dates = env.price_df.index

while not done["__all__"]:
    actions = {}
    for agent_id, agent_obs in obs.items():
        action, _, _ = agent.compute_single_action(agent_obs, policy_id="shared_policy")
        actions[agent_id] = action
    obs, rewards, done, info = env.step(actions)
    prices = env.price_df.iloc[env.current_step].values
    portfolio_value = env.cash + np.dot(env.positions, prices)
    portfolio_values.append(portfolio_value)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(dates[:len(portfolio_values)], portfolio_values, label="RL Portfolio Value")
plt.title("Evolução do Portfólio do Agente RL")
plt.xlabel("Data")
plt.ylabel("Valor do Portfólio")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()