In [1]:
#multiagent ppo
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment for Training
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.entry_price = 0
        self.trades = []
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)
        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, **kwargs):
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:  # Close short
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0
        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:  # Close long
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Mixed Multi-Agent Testing Environment
class MixedMultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(MixedMultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.scaler = scaler

        # Two teams with two agents each
        self.num_teams = 2
        self.agents_per_team = 2
        self.num_agents = self.num_teams * self.agents_per_team

        # Initialize agent balances, positions, and trades
        self.balances = [initial_balance] * self.num_agents
        self.positions = [0] * self.num_agents
        self.entry_prices = [0] * self.num_agents
        self.trades = [[] for _ in range(self.num_agents)]

    def reset(self, **kwargs):
        self.current_step = 0
        self.balances = [self.initial_balance] * self.num_agents
        self.positions = [0] * self.num_agents
        self.entry_prices = [0] * self.num_agents
        self.trades = [[] for _ in range(self.num_agents)]
        return [self._get_observation() for _ in range(self.num_agents)], {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, actions):
        rewards = [0] * self.num_agents
        current_price = self.data.iloc[self.current_step]['close']
        for i, action in enumerate(actions):
            if action == 1:  # Buy
                if self.positions[i] == 0:
                    self.positions[i] = 1
                    self.entry_prices[i] = current_price
                elif self.positions[i] == -1:  # Close short
                    reward = self.entry_prices[i] - current_price
                    self.balances[i] += reward
                    rewards[i] = reward
                    self.positions[i] = 0
                    self.trades[i].append(reward)
            elif action == 2:  # Sell
                if self.positions[i] == 0:
                    self.positions[i] = -1
                    self.entry_prices[i] = current_price
                elif self.positions[i] == 1:  # Close long
                    reward = current_price - self.entry_prices[i]
                    self.balances[i] += reward
                    rewards[i] = reward
                    self.positions[i] = 0
                    self.trades[i].append(reward)

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return [self._get_observation() for _ in range(self.num_agents)], rewards, terminated, truncated, {}

# Calculate individual metrics
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = total_profit / initial_balance
    
    # Calculate positive and negative trades for profit factor
    positive_trades = [trade for trade in trades if trade > 0]
    negative_trades = [trade for trade in trades if trade < 0]
    profit_factor = sum(positive_trades) / abs(sum(negative_trades)) if negative_trades else float('inf')

    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Calculate Sharpe Ratio
    sharpe_ratio = np.mean(trades) / np.std(trades) if np.std(trades) != 0 else 0

    # Sortino Ratio (uses only negative trades as downside deviation)
    downside_std = np.std([trade for trade in trades if trade < 0])
    sortino_ratio = np.mean(trades) / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    cumulative_balance = np.cumsum(trades)
    running_max = np.maximum.accumulate(cumulative_balance)
    drawdown = running_max - cumulative_balance
    max_drawdown = np.max(drawdown) if len(drawdown) > 0 else 0

    return {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }

# Sharpe Ratio-weighted aggregation
def aggregate_metrics_sharpe_weighted(metrics_list):
    # Filter out agents with non-positive Sharpe Ratios
    positive_sharpe_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_sharpe_metrics)
    
    # If no agents have a positive Sharpe Ratio, return zeros for all metrics
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'LPL_TRAINING.csv'
    test_file = 'LPL_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Track training metrics
    training_metrics = []

    # Train each agent independently in single-agent environments
    models = []
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)
        model = PPO("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=50000)
        models.append(model)

        # Calculate training metrics for each agent
        final_balance = env_train.balance
        metrics = calculate_metrics(env_train.trades, env_train.initial_balance, final_balance)
        training_metrics.append(metrics)
        print(f"\n--- Agent {i+1} Training Metrics ---")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")

    # Aggregate training metrics with Sharpe Ratio weighting
    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics for All Agents (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the mixed multi-agent environment
    env_test = MixedMultiAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics for each agent in the testing environment
    testing_metrics = []
    for i in range(4):  # 4 agents
        final_balance = env_test.balances[i]
        metrics = calculate_metrics(env_test.trades[i], env_test.initial_balance, final_balance)
        testing_metrics.append(metrics)
        print(f"\n--- Agent {i+1} Testing Metrics ---")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")

    # Aggregate testing metrics with Sharpe Ratio weighting
    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics for All Agents (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 2044 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1510        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011675965 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -5.21       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00488     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00875    |
|    value_loss         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.78e+03   |
|    ep_rew_mean          | 9.93       |
| time/                   |            |
|    fps                  | 1305       |
|    iterations           | 12         |
|    time_elapsed         | 18         |
|    total_timesteps      | 24576      |
| train/                  |            |
|    approx_kl            | 0.01091681 |
|    clip_fraction        | 0.107      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.97      |
|    explained_variance   | 0.394      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0102     |
|    n_updates            | 110        |
|    policy_gradient_loss | -0.0136    |
|    value_loss           | 0.011      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.78e+03   |
|    ep_rew_mean          | 20.6       |
| time/                   |            |
|    fps                  | 1300       |
|    iterations           | 22         |
|    time_elapsed         | 34         |
|    total_timesteps      | 45056      |
| train/                  |            |
|    approx_kl            | 0.00900091 |
|    clip_fraction        | 0.0939     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.848     |
|    explained_variance   | 0.0303     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.027     |
|    n_updates            | 210        |
|    policy_gradient_loss | -0.00844   |
|    value_loss           | 0.0112     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.78e+03   |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 4.35        |
| time/                   |             |
|    fps                  | 1265        |
|    iterations           | 7           |
|    time_elapsed         | 11          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.009546319 |
|    clip_fraction        | 0.0845      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.679       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0208     |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0116     |
|    value_loss           | 0.0105      |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.78e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 16.2        |
| time/                   |             |
|    fps                  | 1274        |
|    iterations           | 17          |
|    time_elapsed         | 27          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.015569208 |
|    clip_fraction        | 0.0989      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.796      |
|    explained_variance   | 0.178       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.025      |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.00597    |
|    value_loss           | 0.00875     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------
| time/              |      |
|    fps             | 1950 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1652        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008915354 |
|    clip_fraction        | 0.0297      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -1.48       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0102     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00456    |
|    value_loss           | 0.012       |
-----------------------------------------
----------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 11.6        |
| time/                   |             |
|    fps                  | 1402        |
|    iterations           | 12          |
|    time_elapsed         | 17          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.008281537 |
|    clip_fraction        | 0.0813      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.951      |
|    explained_variance   | 0.22        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0279     |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00764    |
|    value_loss           | 0.00981     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 21          |
| time/                   |             |
|    fps                  | 1332        |
|    iterations           | 22          |
|    time_elapsed         | 33          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.008581851 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.689      |
|    explained_variance   | -0.0129     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00905    |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.00887    |
|    value_loss           | 0.00952     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 1.43        |
| time/                   |             |
|    fps                  | 1364        |
|    iterations           | 7           |
|    time_elapsed         | 10          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.008607304 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.03       |
|    explained_variance   | 0.875       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0011      |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0098     |
|    value_loss           | 0.00928     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 12.1        |
| time/                   |             |
|    fps                  | 1303        |
|    iterations           | 17          |
|    time_elapsed         | 26          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.010080358 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.908      |
|    explained_variance   | 0.131       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0174     |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.00905    |
|    value_loss           | 0.0111      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+


--- Agent 1 Testing Metrics ---
Total Profit: 15.488312469746234
Cumulative Return: 0.0015488312469746233
Win Rate: 0.7112676056338029
Profit Factor: 12.216306043546064
Sharpe Ratio: 0.5954843466198841
Sortino Ratio: 2.2673831923102146
Maximum Drawdown: 0.1709351015184315

--- Agent 2 Testing Metrics ---
Total Profit: 14.213979580063096
Cumulative Return: 0.0014213979580063097
Win Rate: 0.6809651474530831
Profit Factor: 9.341936015831147
Sharpe Ratio: 0.5775209116965766
Sortino Ratio: 2.391268667197248
Maximum Drawdown: 0.1311282970552341

--- Agent 3 Testing Metrics ---
Total Profit: 15.924829204339403
Cumulative Return: 0.0015924829204339403
Win Rate: 0.7282608695652174
Profit Factor: 14.81678924058342
Sharpe Ratio: 0.6554693496055152
Sortino Ratio: 3.2305319095771603
Maximum Drawdown: 0.08429676239265105

--- Agent 4 Testing Metrics ---
Total Profit: 16.14484375418033
Cumulative Return: 0.0016144843754180328
Win Rate: 0.720626631853786
Profit Factor: 12.46582632121593
Sharpe Ratio:

In [2]:
#multiagent DQN
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import DQN
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'LPL_TRAINING.csv'
    test_file = 'LPL_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)
        model = DQN("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=50000)
        models.append(model)

        # Record training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.78e+03 |
|    ep_rew_mean      | 0.669    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1190     |
|    time_elapsed     | 16       |
|    total_timesteps  | 19112    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000353 |
|    n_updates        | 4752     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.78e+03 |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1129     |
|    time_elapsed     | 33       |
|    total_timesteps  | 38224    |
| train/              |        

In [3]:
#multiagent a2c
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import A2C
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment with Modified Reward Structure
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Normalized stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        obs -= np.min(obs, axis=0)
        obs /= np.max(obs, axis=0) + 1e-8  # Normalizing to [0,1]
        return obs

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        # Normalize reward and add transaction cost penalty
        reward = (reward / self.initial_balance) - 0.001  # Small penalty for holding a position

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), np.clip(reward, -1, 1), terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'LPL_TRAINING.csv'
    test_file = 'LPL_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)
        model = A2C("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=100000)  # Increased timesteps
        models.append(model)

        # Record training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| time/                 |          |
|    fps                | 904      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.09    |
|    explained_variance | -148     |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.0464  |
|    value_loss         | 0.00573  |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 954       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -1.08     |
|    explained_variance | -1.71e+03 |
|    learning_rate      | 0.0007    |
|    n_u

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 998       |
|    iterations         | 1600      |
|    time_elapsed       | 8         |
|    total_timesteps    | 8000      |
| train/                |           |
|    entropy_loss       | -1.01     |
|    explained_variance | -2.17e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 1599      |
|    policy_loss        | -0.0127   |
|    value_loss         | 0.000286  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 999      |
|    iterations         | 1700     |
|    time_elapsed       | 8        |
|    total_timesteps    | 8500     |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 952      |
|    iterations         | 2900     |
|    time_elapsed       | 15       |
|    total_timesteps    | 14500    |
| train/                |          |
|    entropy_loss       | -0.908   |
|    explained_variance | -20      |
|    learning_rate      | 0.0007   |
|    n_updates          | 2899     |
|    policy_loss        | 0.000445 |
|    value_loss         | 1.38e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 943      |
|    iterations         | 3000     |
|    time_elapsed       | 15       |
|    total_timesteps    | 15000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 942      |
|    iterations         | 4200     |
|    time_elapsed       | 22       |
|    total_timesteps    | 21000    |
| train/                |          |
|    entropy_loss       | -1.05    |
|    explained_variance | -24.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4199     |
|    policy_loss        | 0.00112  |
|    value_loss         | 1.83e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 942      |
|    iterations         | 4300     |
|    time_elapsed       | 22       |
|    total_timesteps    | 21500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 921      |
|    iterations         | 5500     |
|    time_elapsed       | 29       |
|    total_timesteps    | 27500    |
| train/                |          |
|    entropy_loss       | -0.988   |
|    explained_variance | -8.04    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5499     |
|    policy_loss        | 0.000163 |
|    value_loss         | 2.33e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 922      |
|    iterations         | 5600     |
|    time_elapsed       | 30       |
|    total_timesteps    | 28000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 924       |
|    iterations         | 6800      |
|    time_elapsed       | 36        |
|    total_timesteps    | 34000     |
| train/                |           |
|    entropy_loss       | -0.288    |
|    explained_variance | -782      |
|    learning_rate      | 0.0007    |
|    n_updates          | 6799      |
|    policy_loss        | -0.000291 |
|    value_loss         | 2.47e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 922      |
|    iterations         | 6900     |
|    time_elapsed       | 37       |
|    total_timesteps    | 34500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 883      |
|    iterations         | 8100     |
|    time_elapsed       | 45       |
|    total_timesteps    | 40500    |
| train/                |          |
|    entropy_loss       | -0.0818  |
|    explained_variance | -30.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8099     |
|    policy_loss        | 6.39e-06 |
|    value_loss         | 3.51e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 884       |
|    iterations         | 8200      |
|    time_elapsed       | 46        |
|    total_timesteps    | 41000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 885      |
|    iterations         | 9400     |
|    time_elapsed       | 53       |
|    total_timesteps    | 47000    |
| train/                |          |
|    entropy_loss       | -0.00692 |
|    explained_variance | -1.66    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9399     |
|    policy_loss        | 2.63e-07 |
|    value_loss         | 3.52e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 885       |
|    iterations         | 9500      |
|    time_elapsed       | 53        |
|    total_timesteps    | 47500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 883      |
|    iterations         | 10700    |
|    time_elapsed       | 60       |
|    total_timesteps    | 53500    |
| train/                |          |
|    entropy_loss       | -0.00511 |
|    explained_variance | -12.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10699    |
|    policy_loss        | 5.99e-07 |
|    value_loss         | 1.69e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 883      |
|    iterations         | 10800    |
|    time_elapsed       | 61       |
|    total_timesteps    | 54000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 867      |
|    iterations         | 12000    |
|    time_elapsed       | 69       |
|    total_timesteps    | 60000    |
| train/                |          |
|    entropy_loss       | -0.00305 |
|    explained_variance | -911     |
|    learning_rate      | 0.0007   |
|    n_updates          | 11999    |
|    policy_loss        | 1.05e-08 |
|    value_loss         | 9.7e-08  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 865       |
|    iterations         | 12100     |
|    time_elapsed       | 69        |
|    total_timesteps    | 60500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 870      |
|    iterations         | 13300    |
|    time_elapsed       | 76       |
|    total_timesteps    | 66500    |
| train/                |          |
|    entropy_loss       | -0.0494  |
|    explained_variance | -9.92    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13299    |
|    policy_loss        | -3.7e-06 |
|    value_loss         | 4.48e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 871      |
|    iterations         | 13400    |
|    time_elapsed       | 76       |
|    total_timesteps    | 67000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 866       |
|    iterations         | 14600     |
|    time_elapsed       | 84        |
|    total_timesteps    | 73000     |
| train/                |           |
|    entropy_loss       | -0.0391   |
|    explained_variance | -9.74     |
|    learning_rate      | 0.0007    |
|    n_updates          | 14599     |
|    policy_loss        | -1.72e-06 |
|    value_loss         | 5.33e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 862      |
|    iterations         | 14700    |
|    time_elapsed       | 85       |
|    total_timesteps    | 73500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 858       |
|    iterations         | 15900     |
|    time_elapsed       | 92        |
|    total_timesteps    | 79500     |
| train/                |           |
|    entropy_loss       | -0.761    |
|    explained_variance | -9.69     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15899     |
|    policy_loss        | -0.000717 |
|    value_loss         | 3.98e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 858      |
|    iterations         | 16000    |
|    time_elapsed       | 93       |
|    total_timesteps    | 80000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 856      |
|    iterations         | 17200    |
|    time_elapsed       | 100      |
|    total_timesteps    | 86000    |
| train/                |          |
|    entropy_loss       | -0.196   |
|    explained_variance | -11.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17199    |
|    policy_loss        | 2.49e-05 |
|    value_loss         | 3.56e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 855       |
|    iterations         | 17300     |
|    time_elapsed       | 101       |
|    total_timesteps    | 86500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 850      |
|    iterations         | 18500    |
|    time_elapsed       | 108      |
|    total_timesteps    | 92500    |
| train/                |          |
|    entropy_loss       | -0.167   |
|    explained_variance | -42.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18499    |
|    policy_loss        | 0.000173 |
|    value_loss         | 2.98e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 848      |
|    iterations         | 18600    |
|    time_elapsed       | 109      |
|    total_timesteps    | 93000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 856      |
|    iterations         | 19800    |
|    time_elapsed       | 115      |
|    total_timesteps    | 99000    |
| train/                |          |
|    entropy_loss       | -0.733   |
|    explained_variance | -0.697   |
|    learning_rate      | 0.0007   |
|    n_updates          | 19799    |
|    policy_loss        | -0.00229 |
|    value_loss         | 5.87e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 856       |
|    iterations         | 19900     |
|    time_elapsed       | 116       |
|    total_timesteps    | 99500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 925      |
|    iterations         | 1300     |
|    time_elapsed       | 7        |
|    total_timesteps    | 6500     |
| train/                |          |
|    entropy_loss       | -0.938   |
|    explained_variance | -400     |
|    learning_rate      | 0.0007   |
|    n_updates          | 1299     |
|    policy_loss        | 0.00362  |
|    value_loss         | 0.000225 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 930      |
|    iterations         | 1400     |
|    time_elapsed       | 7        |
|    total_timesteps    | 7000     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 898      |
|    iterations         | 2600     |
|    time_elapsed       | 14       |
|    total_timesteps    | 13000    |
| train/                |          |
|    entropy_loss       | -0.567   |
|    explained_variance | -246     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2599     |
|    policy_loss        | 0.00239  |
|    value_loss         | 1.28e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 899      |
|    iterations         | 2700     |
|    time_elapsed       | 15       |
|    total_timesteps    | 13500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 912      |
|    iterations         | 3900     |
|    time_elapsed       | 21       |
|    total_timesteps    | 19500    |
| train/                |          |
|    entropy_loss       | -0.577   |
|    explained_variance | -99.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3899     |
|    policy_loss        | 0.00154  |
|    value_loss         | 4.54e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 905       |
|    iterations         | 4000      |
|    time_elapsed       | 22        |
|    total_timesteps    | 20000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 870      |
|    iterations         | 5200     |
|    time_elapsed       | 29       |
|    total_timesteps    | 26000    |
| train/                |          |
|    entropy_loss       | -0.885   |
|    explained_variance | -219     |
|    learning_rate      | 0.0007   |
|    n_updates          | 5199     |
|    policy_loss        | 0.000523 |
|    value_loss         | 4.01e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 863      |
|    iterations         | 5300     |
|    time_elapsed       | 30       |
|    total_timesteps    | 26500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 867      |
|    iterations         | 6500     |
|    time_elapsed       | 37       |
|    total_timesteps    | 32500    |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | -12.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6499     |
|    policy_loss        | 0.00094  |
|    value_loss         | 1.62e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 869      |
|    iterations         | 6600     |
|    time_elapsed       | 37       |
|    total_timesteps    | 33000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 833       |
|    iterations         | 7800      |
|    time_elapsed       | 46        |
|    total_timesteps    | 39000     |
| train/                |           |
|    entropy_loss       | -0.147    |
|    explained_variance | -52.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 7799      |
|    policy_loss        | -0.000102 |
|    value_loss         | 2.33e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 835      |
|    iterations         | 7900     |
|    time_elapsed       | 47       |
|    total_timesteps    | 39500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 824       |
|    iterations         | 9100      |
|    time_elapsed       | 55        |
|    total_timesteps    | 45500     |
| train/                |           |
|    entropy_loss       | -0.0224   |
|    explained_variance | -0.217    |
|    learning_rate      | 0.0007    |
|    n_updates          | 9099      |
|    policy_loss        | -7.88e-07 |
|    value_loss         | 1.93e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 825      |
|    iterations         | 9200     |
|    time_elapsed       | 55       |
|    total_timesteps    | 46000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 844      |
|    iterations         | 10400    |
|    time_elapsed       | 61       |
|    total_timesteps    | 52000    |
| train/                |          |
|    entropy_loss       | -0.00363 |
|    explained_variance | -4.41    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10399    |
|    policy_loss        | 5.57e-07 |
|    value_loss         | 2.74e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 845      |
|    iterations         | 10500    |
|    time_elapsed       | 62       |
|    total_timesteps    | 52500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 856       |
|    iterations         | 11700     |
|    time_elapsed       | 68        |
|    total_timesteps    | 58500     |
| train/                |           |
|    entropy_loss       | -0.0016   |
|    explained_variance | -61.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 11699     |
|    policy_loss        | -2.98e-07 |
|    value_loss         | 4.72e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 854       |
|    iterations         | 11800     |
|    time_elapsed       | 69        |
|    total_timesteps    | 59000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 850      |
|    iterations         | 13000    |
|    time_elapsed       | 76       |
|    total_timesteps    | 65000    |
| train/                |          |
|    entropy_loss       | -0.0025  |
|    explained_variance | -427     |
|    learning_rate      | 0.0007   |
|    n_updates          | 12999    |
|    policy_loss        | 4.32e-07 |
|    value_loss         | 7.43e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 851      |
|    iterations         | 13100    |
|    time_elapsed       | 76       |
|    total_timesteps    | 65500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 847       |
|    iterations         | 14300     |
|    time_elapsed       | 84        |
|    total_timesteps    | 71500     |
| train/                |           |
|    entropy_loss       | -0.0022   |
|    explained_variance | -5.54     |
|    learning_rate      | 0.0007    |
|    n_updates          | 14299     |
|    policy_loss        | -1.68e-07 |
|    value_loss         | 1.84e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 846      |
|    iterations         | 14400    |
|    time_elapsed       | 85       |
|    total_timesteps    | 72000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 837      |
|    iterations         | 15600    |
|    time_elapsed       | 93       |
|    total_timesteps    | 78000    |
| train/                |          |
|    entropy_loss       | -0.00203 |
|    explained_variance | -41.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15599    |
|    policy_loss        | 1.16e-07 |
|    value_loss         | 1.44e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 838       |
|    iterations         | 15700     |
|    time_elapsed       | 93        |
|    total_timesteps    | 78500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 841      |
|    iterations         | 16900    |
|    time_elapsed       | 100      |
|    total_timesteps    | 84500    |
| train/                |          |
|    entropy_loss       | -0.00153 |
|    explained_variance | -38.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16899    |
|    policy_loss        | 8.66e-09 |
|    value_loss         | 2.36e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 841       |
|    iterations         | 17000     |
|    time_elapsed       | 101       |
|    total_timesteps    | 85000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 835      |
|    iterations         | 18200    |
|    time_elapsed       | 108      |
|    total_timesteps    | 91000    |
| train/                |          |
|    entropy_loss       | -0.00153 |
|    explained_variance | -20.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18199    |
|    policy_loss        | 1.32e-06 |
|    value_loss         | 8.27e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 835       |
|    iterations         | 18300     |
|    time_elapsed       | 109       |
|    total_timesteps    | 91500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 836      |
|    iterations         | 19500    |
|    time_elapsed       | 116      |
|    total_timesteps    | 97500    |
| train/                |          |
|    entropy_loss       | -0.00109 |
|    explained_variance | -10.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 19499    |
|    policy_loss        | 2.77e-08 |
|    value_loss         | 1.29e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 836       |
|    iterations         | 19600     |
|    time_elapsed       | 117       |
|    total_timesteps    | 98000     |
| train/                |    

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


------------------------------------
| time/                 |          |
|    fps                | 495      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.573   |
|    explained_variance | -136     |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.158   |
|    value_loss         | 0.025    |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 527      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.395   |
|    explained_variance | -771     |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | -0.0312  |
|    value_loss         | 0.00622  |
-

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 743       |
|    iterations         | 1600      |
|    time_elapsed       | 10        |
|    total_timesteps    | 8000      |
| train/                |           |
|    entropy_loss       | -0.00524  |
|    explained_variance | -43.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 1599      |
|    policy_loss        | -9.24e-06 |
|    value_loss         | 0.000339  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 746      |
|    iterations         | 1700     |
|    time_elapsed       | 11       |
|    total_timesteps    | 8500     |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 787       |
|    iterations         | 2900      |
|    time_elapsed       | 18        |
|    total_timesteps    | 14500     |
| train/                |           |
|    entropy_loss       | -0.00436  |
|    explained_variance | -285      |
|    learning_rate      | 0.0007    |
|    n_updates          | 2899      |
|    policy_loss        | -2.06e-06 |
|    value_loss         | 2.6e-05   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 786       |
|    iterations         | 3000      |
|    time_elapsed       | 19        |
|    total_timesteps    | 15000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 785       |
|    iterations         | 4200      |
|    time_elapsed       | 26        |
|    total_timesteps    | 21000     |
| train/                |           |
|    entropy_loss       | -0.0607   |
|    explained_variance | -18.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 4199      |
|    policy_loss        | -7.27e-06 |
|    value_loss         | 3.45e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 788       |
|    iterations         | 4300      |
|    time_elapsed       | 27        |
|    total_timesteps    | 21500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 797      |
|    iterations         | 5500     |
|    time_elapsed       | 34       |
|    total_timesteps    | 27500    |
| train/                |          |
|    entropy_loss       | -0.609   |
|    explained_variance | -7.8     |
|    learning_rate      | 0.0007   |
|    n_updates          | 5499     |
|    policy_loss        | 0.00179  |
|    value_loss         | 5.25e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 797       |
|    iterations         | 5600      |
|    time_elapsed       | 35        |
|    total_timesteps    | 28000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 791      |
|    iterations         | 6800     |
|    time_elapsed       | 42       |
|    total_timesteps    | 34000    |
| train/                |          |
|    entropy_loss       | -0.0472  |
|    explained_variance | -273     |
|    learning_rate      | 0.0007   |
|    n_updates          | 6799     |
|    policy_loss        | 5.44e-06 |
|    value_loss         | 5.83e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 793       |
|    iterations         | 6900      |
|    time_elapsed       | 43        |
|    total_timesteps    | 34500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 813      |
|    iterations         | 8100     |
|    time_elapsed       | 49       |
|    total_timesteps    | 40500    |
| train/                |          |
|    entropy_loss       | -0.023   |
|    explained_variance | -16.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8099     |
|    policy_loss        | -2.7e-06 |
|    value_loss         | 2.21e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 815      |
|    iterations         | 8200     |
|    time_elapsed       | 50       |
|    total_timesteps    | 41000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 834       |
|    iterations         | 9400      |
|    time_elapsed       | 56        |
|    total_timesteps    | 47000     |
| train/                |           |
|    entropy_loss       | -0.00609  |
|    explained_variance | -4.02     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9399      |
|    policy_loss        | -6.21e-07 |
|    value_loss         | 1.15e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 835       |
|    iterations         | 9500      |
|    time_elapsed       | 56        |
|    total_timesteps    | 47500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 848       |
|    iterations         | 10700     |
|    time_elapsed       | 63        |
|    total_timesteps    | 53500     |
| train/                |           |
|    entropy_loss       | -0.00696  |
|    explained_variance | -715      |
|    learning_rate      | 0.0007    |
|    n_updates          | 10699     |
|    policy_loss        | -7.85e-07 |
|    value_loss         | 1.29e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 849       |
|    iterations         | 10800     |
|    time_elapsed       | 63        |
|    total_timesteps    | 54000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 851       |
|    iterations         | 12000     |
|    time_elapsed       | 70        |
|    total_timesteps    | 60000     |
| train/                |           |
|    entropy_loss       | -0.00209  |
|    explained_variance | -4.8      |
|    learning_rate      | 0.0007    |
|    n_updates          | 11999     |
|    policy_loss        | -2.67e-07 |
|    value_loss         | 1.67e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 849       |
|    iterations         | 12100     |
|    time_elapsed       | 71        |
|    total_timesteps    | 60500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 857      |
|    iterations         | 13300    |
|    time_elapsed       | 77       |
|    total_timesteps    | 66500    |
| train/                |          |
|    entropy_loss       | -0.00566 |
|    explained_variance | -52.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13299    |
|    policy_loss        | 4.23e-07 |
|    value_loss         | 5.67e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 858       |
|    iterations         | 13400     |
|    time_elapsed       | 78        |
|    total_timesteps    | 67000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 867      |
|    iterations         | 14600    |
|    time_elapsed       | 84       |
|    total_timesteps    | 73000    |
| train/                |          |
|    entropy_loss       | -0.00479 |
|    explained_variance | -9.9     |
|    learning_rate      | 0.0007   |
|    n_updates          | 14599    |
|    policy_loss        | 7.11e-07 |
|    value_loss         | 3.3e-06  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 867       |
|    iterations         | 14700     |
|    time_elapsed       | 84        |
|    total_timesteps    | 73500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 866       |
|    iterations         | 15900     |
|    time_elapsed       | 91        |
|    total_timesteps    | 79500     |
| train/                |           |
|    entropy_loss       | -0.0093   |
|    explained_variance | -70.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15899     |
|    policy_loss        | -3.43e-06 |
|    value_loss         | 1.15e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 866      |
|    iterations         | 16000    |
|    time_elapsed       | 92       |
|    total_timesteps    | 80000    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 864       |
|    iterations         | 17200     |
|    time_elapsed       | 99        |
|    total_timesteps    | 86000     |
| train/                |           |
|    entropy_loss       | -0.00303  |
|    explained_variance | -1.31e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 17199     |
|    policy_loss        | -5.93e-08 |
|    value_loss         | 1.67e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 864       |
|    iterations         | 17300     |
|    time_elapsed       | 100       |
|    total_timesteps    | 86500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 861      |
|    iterations         | 18500    |
|    time_elapsed       | 107      |
|    total_timesteps    | 92500    |
| train/                |          |
|    entropy_loss       | -0.00291 |
|    explained_variance | -0.896   |
|    learning_rate      | 0.0007   |
|    n_updates          | 18499    |
|    policy_loss        | 1.48e-07 |
|    value_loss         | 2.83e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 861       |
|    iterations         | 18600     |
|    time_elapsed       | 107       |
|    total_timesteps    | 93000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 865       |
|    iterations         | 19800     |
|    time_elapsed       | 114       |
|    total_timesteps    | 99000     |
| train/                |           |
|    entropy_loss       | -0.00475  |
|    explained_variance | -3.79     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19799     |
|    policy_loss        | -8.86e-07 |
|    value_loss         | 4.04e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 866      |
|    iterations         | 19900    |
|    time_elapsed       | 114      |
|    total_timesteps    | 99500    |
| train/             

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


-------------------------------------
| time/                 |           |
|    fps                | 628       |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.09     |
|    explained_variance | -2.76e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -0.0908   |
|    value_loss         | 0.0105    |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 595       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -1.1      |
|    explained_variance | -1.15e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 199       |
|    policy_loss        | -0.0583   |
|    value_l

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 802      |
|    iterations         | 1600     |
|    time_elapsed       | 9        |
|    total_timesteps    | 8000     |
| train/                |          |
|    entropy_loss       | -0.899   |
|    explained_variance | -242     |
|    learning_rate      | 0.0007   |
|    n_updates          | 1599     |
|    policy_loss        | 0.00466  |
|    value_loss         | 0.000307 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 809      |
|    iterations         | 1700     |
|    time_elapsed       | 10       |
|    total_timesteps    | 8500     |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 811       |
|    iterations         | 2900      |
|    time_elapsed       | 17        |
|    total_timesteps    | 14500     |
| train/                |           |
|    entropy_loss       | -1.07     |
|    explained_variance | -7.49e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 2899      |
|    policy_loss        | -0.00612  |
|    value_loss         | 5.62e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 798      |
|    iterations         | 3000     |
|    time_elapsed       | 18       |
|    total_timesteps    | 15000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 789      |
|    iterations         | 4200     |
|    time_elapsed       | 26       |
|    total_timesteps    | 21000    |
| train/                |          |
|    entropy_loss       | -1.05    |
|    explained_variance | -73.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4199     |
|    policy_loss        | 0.000417 |
|    value_loss         | 1.4e-06  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 792      |
|    iterations         | 4300     |
|    time_elapsed       | 27       |
|    total_timesteps    | 21500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 814       |
|    iterations         | 5500      |
|    time_elapsed       | 33        |
|    total_timesteps    | 27500     |
| train/                |           |
|    entropy_loss       | -0.0563   |
|    explained_variance | -98.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 5499      |
|    policy_loss        | -7.84e-06 |
|    value_loss         | 8.64e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 815      |
|    iterations         | 5600     |
|    time_elapsed       | 34       |
|    total_timesteps    | 28000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 803      |
|    iterations         | 6800     |
|    time_elapsed       | 42       |
|    total_timesteps    | 34000    |
| train/                |          |
|    entropy_loss       | -0.342   |
|    explained_variance | -92      |
|    learning_rate      | 0.0007   |
|    n_updates          | 6799     |
|    policy_loss        | 9.95e-05 |
|    value_loss         | 2.75e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 803      |
|    iterations         | 6900     |
|    time_elapsed       | 42       |
|    total_timesteps    | 34500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 813      |
|    iterations         | 8100     |
|    time_elapsed       | 49       |
|    total_timesteps    | 40500    |
| train/                |          |
|    entropy_loss       | -0.329   |
|    explained_variance | -15.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8099     |
|    policy_loss        | 7.26e-05 |
|    value_loss         | 1.14e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 807      |
|    iterations         | 8200     |
|    time_elapsed       | 50       |
|    total_timesteps    | 41000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 799      |
|    iterations         | 9400     |
|    time_elapsed       | 58       |
|    total_timesteps    | 47000    |
| train/                |          |
|    entropy_loss       | -0.0742  |
|    explained_variance | -24.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9399     |
|    policy_loss        | 7.64e-06 |
|    value_loss         | 3.56e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 800       |
|    iterations         | 9500      |
|    time_elapsed       | 59        |
|    total_timesteps    | 47500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 806       |
|    iterations         | 10700     |
|    time_elapsed       | 66        |
|    total_timesteps    | 53500     |
| train/                |           |
|    entropy_loss       | -0.015    |
|    explained_variance | -105      |
|    learning_rate      | 0.0007    |
|    n_updates          | 10699     |
|    policy_loss        | -4.83e-06 |
|    value_loss         | 8.94e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 808      |
|    iterations         | 10800    |
|    time_elapsed       | 66       |
|    total_timesteps    | 54000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 802      |
|    iterations         | 12000    |
|    time_elapsed       | 74       |
|    total_timesteps    | 60000    |
| train/                |          |
|    entropy_loss       | -0.0304  |
|    explained_variance | -4.55    |
|    learning_rate      | 0.0007   |
|    n_updates          | 11999    |
|    policy_loss        | 2.52e-06 |
|    value_loss         | 5.67e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 803       |
|    iterations         | 12100     |
|    time_elapsed       | 75        |
|    total_timesteps    | 60500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 810       |
|    iterations         | 13300     |
|    time_elapsed       | 82        |
|    total_timesteps    | 66500     |
| train/                |           |
|    entropy_loss       | -0.0134   |
|    explained_variance | -9.91     |
|    learning_rate      | 0.0007    |
|    n_updates          | 13299     |
|    policy_loss        | -5.77e-07 |
|    value_loss         | 1.68e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 811      |
|    iterations         | 13400    |
|    time_elapsed       | 82       |
|    total_timesteps    | 67000    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 815       |
|    iterations         | 14600     |
|    time_elapsed       | 89        |
|    total_timesteps    | 73000     |
| train/                |           |
|    entropy_loss       | -0.0355   |
|    explained_variance | -39.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 14599     |
|    policy_loss        | -9.21e-06 |
|    value_loss         | 4.86e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 816      |
|    iterations         | 14700    |
|    time_elapsed       | 90       |
|    total_timesteps    | 73500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 819      |
|    iterations         | 15900    |
|    time_elapsed       | 97       |
|    total_timesteps    | 79500    |
| train/                |          |
|    entropy_loss       | -0.347   |
|    explained_variance | -35.6    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15899    |
|    policy_loss        | 0.000139 |
|    value_loss         | 7.38e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 819      |
|    iterations         | 16000    |
|    time_elapsed       | 97       |
|    total_timesteps    | 80000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 824      |
|    iterations         | 17200    |
|    time_elapsed       | 104      |
|    total_timesteps    | 86000    |
| train/                |          |
|    entropy_loss       | -0.0622  |
|    explained_variance | -83.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17199    |
|    policy_loss        | 9.67e-06 |
|    value_loss         | 9.27e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 825      |
|    iterations         | 17300    |
|    time_elapsed       | 104      |
|    total_timesteps    | 86500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 835      |
|    iterations         | 18500    |
|    time_elapsed       | 110      |
|    total_timesteps    | 92500    |
| train/                |          |
|    entropy_loss       | -0.0113  |
|    explained_variance | -111     |
|    learning_rate      | 0.0007   |
|    n_updates          | 18499    |
|    policy_loss        | 3.38e-07 |
|    value_loss         | 4.03e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 835       |
|    iterations         | 18600     |
|    time_elapsed       | 111       |
|    total_timesteps    | 93000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -4.78     |
| time/                 |           |
|    fps                | 844       |
|    iterations         | 19800     |
|    time_elapsed       | 117       |
|    total_timesteps    | 99000     |
| train/                |           |
|    entropy_loss       | -0.0131   |
|    explained_variance | -6.39     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19799     |
|    policy_loss        | -2.75e-06 |
|    value_loss         | 2.37e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -4.78    |
| time/                 |          |
|    fps                | 845      |
|    iterations         | 19900    |
|    time_elapsed       | 117      |
|    total_timesteps    | 99500    |
| train/             

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [4]:
#multiagent ensemble
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DQN, A2C
from sklearn.preprocessing import StandardScaler
from collections import Counter

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Ensemble model function
def ensemble_predict(actions):
    actions = [int(action) for action in actions]
    action_counts = Counter(actions)
    return action_counts.most_common(1)[0][0]

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'LPL_TRAINING.csv'
    test_file = 'LPL_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    ensemble_models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)

        # Initialize each model
        ppo_model = PPO("MlpPolicy", env_train, verbose=1)
        dqn_model = DQN("MlpPolicy", env_train, verbose=1)
        a2c_model = A2C("MlpPolicy", env_train, verbose=1)

        # Train each model
        ppo_model.learn(total_timesteps=50000)
        dqn_model.learn(total_timesteps=50000)
        a2c_model.learn(total_timesteps=50000)

        # Store trained models in a list
        ensemble_models.append((ppo_model, dqn_model, a2c_model))

        # Calculate training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = []
        for i, (ppo_model, dqn_model, a2c_model) in enumerate(ensemble_models):
            ppo_action, _ = ppo_model.predict(obs[i])
            dqn_action, _ = dqn_model.predict(obs[i])
            a2c_action, _ = a2c_model.predict(obs[i])
            final_action = ensemble_predict([ppo_action, dqn_action, a2c_action])
            actions.append(final_action)

        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 2093 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1728        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005869644 |
|    clip_fraction        | 0.0388      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -1.38       |
|   

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 4.78e+03  |
|    ep_rew_mean          | 7.11      |
| time/                   |           |
|    fps                  | 1425      |
|    iterations           | 11        |
|    time_elapsed         | 15        |
|    total_timesteps      | 22528     |
| train/                  |           |
|    approx_kl            | 0.0071454 |
|    clip_fraction        | 0.0847    |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.01     |
|    explained_variance   | 0.838     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0126   |
|    n_updates            | 100       |
|    policy_gradient_loss | -0.0106   |
|    value_loss           | 0.00648   |
---------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 4.78e+03     |
|    ep_rew_mean          | 10.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 20.1        |
| time/                   |             |
|    fps                  | 1343        |
|    iterations           | 21          |
|    time_elapsed         | 32          |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.014622018 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.79       |
|    explained_variance   | 0.695       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00847    |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.00647    |
|    value_loss           | 0.00824     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

------------------------------------
| time/                 |          |
|    fps                | 573      |
|    iterations         | 700      |
|    time_elapsed       | 6        |
|    total_timesteps    | 3500     |
| train/                |          |
|    entropy_loss       | -0.588   |
|    explained_variance | -9.8     |
|    learning_rate      | 0.0007   |
|    n_updates          | 699      |
|    policy_loss        | 0.0158   |
|    value_loss         | 0.000439 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 568      |
|    iterations         | 800      |
|    time_elapsed       | 7        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.498   |
|    explained_variance | -4.1     |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | -0.106   |
|    value_loss         | 0.00487  |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.162    |
| time/                 |          |
|    fps                | 616      |
|    iterations         | 2100     |
|    time_elapsed       | 17       |
|    total_timesteps    | 10500    |
| train/                |          |
|    entropy_loss       | -0.256   |
|    explained_variance | -992     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2099     |
|    policy_loss        | 0.00072  |
|    value_loss         | 0.000908 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.162    |
| time/                 |          |
|    fps                | 616      |
|    iterations         | 2200     |
|    time_elapsed       | 17       |
|    total_timesteps    | 11000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.525   |
| time/                 |          |
|    fps                | 663      |
|    iterations         | 3400     |
|    time_elapsed       | 25       |
|    total_timesteps    | 17000    |
| train/                |          |
|    entropy_loss       | -0.582   |
|    explained_variance | -8.31    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3399     |
|    policy_loss        | -0.00304 |
|    value_loss         | 1.31e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.525   |
| time/                 |          |
|    fps                | 669      |
|    iterations         | 3500     |
|    time_elapsed       | 26       |
|    total_timesteps    | 17500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.0716  |
| time/                 |          |
|    fps                | 717      |
|    iterations         | 4700     |
|    time_elapsed       | 32       |
|    total_timesteps    | 23500    |
| train/                |          |
|    entropy_loss       | -0.911   |
|    explained_variance | -0.639   |
|    learning_rate      | 0.0007   |
|    n_updates          | 4699     |
|    policy_loss        | 0.00694  |
|    value_loss         | 0.000118 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.493   |
| time/                 |          |
|    fps                | 718      |
|    iterations         | 4800     |
|    time_elapsed       | 33       |
|    total_timesteps    | 24000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.365   |
| time/                 |          |
|    fps                | 740      |
|    iterations         | 6100     |
|    time_elapsed       | 41       |
|    total_timesteps    | 30500    |
| train/                |          |
|    entropy_loss       | -0.17    |
|    explained_variance | -48.6    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6099     |
|    policy_loss        | 0.000157 |
|    value_loss         | 2.17e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -0.365    |
| time/                 |           |
|    fps                | 743       |
|    iterations         | 6200      |
|    time_elapsed       | 41        |
|    total_timesteps    | 31000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.184   |
| time/                 |          |
|    fps                | 755      |
|    iterations         | 7400     |
|    time_elapsed       | 48       |
|    total_timesteps    | 37000    |
| train/                |          |
|    entropy_loss       | -0.585   |
|    explained_variance | -0.0241  |
|    learning_rate      | 0.0007   |
|    n_updates          | 7399     |
|    policy_loss        | 0.00149  |
|    value_loss         | 0.000249 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.184   |
| time/                 |          |
|    fps                | 757      |
|    iterations         | 7500     |
|    time_elapsed       | 49       |
|    total_timesteps    | 37500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.876    |
| time/                 |          |
|    fps                | 758      |
|    iterations         | 8700     |
|    time_elapsed       | 57       |
|    total_timesteps    | 43500    |
| train/                |          |
|    entropy_loss       | -0.891   |
|    explained_variance | 0.041    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8699     |
|    policy_loss        | 0.00699  |
|    value_loss         | 0.000181 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.876    |
| time/                 |          |
|    fps                | 756      |
|    iterations         | 8800     |
|    time_elapsed       | 58       |
|    total_timesteps    | 44000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.962    |
| time/                 |          |
|    fps                | 745      |
|    iterations         | 10000    |
|    time_elapsed       | 67       |
|    total_timesteps    | 50000    |
| train/                |          |
|    entropy_loss       | -0.899   |
|    explained_variance | -0.0418  |
|    learning_rate      | 0.0007   |
|    n_updates          | 9999     |
|    policy_loss        | 0.00416  |
|    value_loss         | 0.000147 |
------------------------------------
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1539 |

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 8.13        |
| time/                   |             |
|    fps                  | 957         |
|    iterations           | 11          |
|    time_elapsed         | 23          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.008051269 |
|    clip_fraction        | 0.094       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.902      |
|    explained_variance   | 0.403       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0125      |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00689    |
|    value_loss           | 0.00623     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 22.2        |
| time/                   |             |
|    fps                  | 922         |
|    iterations           | 21          |
|    time_elapsed         | 46          |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.009975587 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.738      |
|    explained_variance   | 0.567       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00118     |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.00897    |
|    value_loss           | 0.00867     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

------------------------------------
| time/                 |          |
|    fps                | 574      |
|    iterations         | 700      |
|    time_elapsed       | 6        |
|    total_timesteps    | 3500     |
| train/                |          |
|    entropy_loss       | -0.779   |
|    explained_variance | -0.0615  |
|    learning_rate      | 0.0007   |
|    n_updates          | 699      |
|    policy_loss        | 0.0455   |
|    value_loss         | 0.00111  |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 599      |
|    iterations         | 800      |
|    time_elapsed       | 6        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.04    |
|    explained_variance | -64.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | -0.0484  |
|    value_loss         | 0.00703  |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.993    |
| time/                 |          |
|    fps                | 722      |
|    iterations         | 2100     |
|    time_elapsed       | 14       |
|    total_timesteps    | 10500    |
| train/                |          |
|    entropy_loss       | -1.08    |
|    explained_variance | -25.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2099     |
|    policy_loss        | -0.0134  |
|    value_loss         | 0.000866 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 0.993    |
| time/                 |          |
|    fps                | 729      |
|    iterations         | 2200     |
|    time_elapsed       | 15       |
|    total_timesteps    | 11000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.03     |
| time/                 |          |
|    fps                | 710      |
|    iterations         | 3400     |
|    time_elapsed       | 23       |
|    total_timesteps    | 17000    |
| train/                |          |
|    entropy_loss       | -0.897   |
|    explained_variance | -3.69    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3399     |
|    policy_loss        | 0.00247  |
|    value_loss         | 1.62e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.03     |
| time/                 |          |
|    fps                | 715      |
|    iterations         | 3500     |
|    time_elapsed       | 24       |
|    total_timesteps    | 17500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.73     |
| time/                 |          |
|    fps                | 734      |
|    iterations         | 4700     |
|    time_elapsed       | 32       |
|    total_timesteps    | 23500    |
| train/                |          |
|    entropy_loss       | -0.886   |
|    explained_variance | -0.439   |
|    learning_rate      | 0.0007   |
|    n_updates          | 4699     |
|    policy_loss        | 0.00521  |
|    value_loss         | 7.16e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.45     |
| time/                 |          |
|    fps                | 727      |
|    iterations         | 4800     |
|    time_elapsed       | 33       |
|    total_timesteps    | 24000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.54     |
| time/                 |          |
|    fps                | 688      |
|    iterations         | 6000     |
|    time_elapsed       | 43       |
|    total_timesteps    | 30000    |
| train/                |          |
|    entropy_loss       | -0.897   |
|    explained_variance | 0.256    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5999     |
|    policy_loss        | -0.00146 |
|    value_loss         | 5.09e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.54     |
| time/                 |          |
|    fps                | 690      |
|    iterations         | 6100     |
|    time_elapsed       | 44       |
|    total_timesteps    | 30500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.31     |
| time/                 |          |
|    fps                | 702      |
|    iterations         | 7300     |
|    time_elapsed       | 51       |
|    total_timesteps    | 36500    |
| train/                |          |
|    entropy_loss       | -0.618   |
|    explained_variance | -0.283   |
|    learning_rate      | 0.0007   |
|    n_updates          | 7299     |
|    policy_loss        | -0.00679 |
|    value_loss         | 3.96e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.31     |
| time/                 |          |
|    fps                | 703      |
|    iterations         | 7400     |
|    time_elapsed       | 52       |
|    total_timesteps    | 37000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.25     |
| time/                 |          |
|    fps                | 698      |
|    iterations         | 8600     |
|    time_elapsed       | 61       |
|    total_timesteps    | 43000    |
| train/                |          |
|    entropy_loss       | -0.48    |
|    explained_variance | -0.772   |
|    learning_rate      | 0.0007   |
|    n_updates          | 8599     |
|    policy_loss        | 0.000351 |
|    value_loss         | 1.29e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | 1.24      |
| time/                 |           |
|    fps                | 699       |
|    iterations         | 8700      |
|    time_elapsed       | 62        |
|    total_timesteps    | 43500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.45     |
| time/                 |          |
|    fps                | 701      |
|    iterations         | 9900     |
|    time_elapsed       | 70       |
|    total_timesteps    | 49500    |
| train/                |          |
|    entropy_loss       | -0.682   |
|    explained_variance | 0.018    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9899     |
|    policy_loss        | -0.00327 |
|    value_loss         | 0.000668 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.45     |
| time/                 |          |
|    fps                | 700      |
|    iterations         | 10000    |
|    time_elapsed       | 71       |
|    total_timesteps    | 50000    |
| train/                |          |
|

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 8.88        |
| time/                   |             |
|    fps                  | 942         |
|    iterations           | 10          |
|    time_elapsed         | 21          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.008306678 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.959      |
|    explained_variance   | 0.252       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0157     |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0124     |
|    value_loss           | 0.0119      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 21.4        |
| time/                   |             |
|    fps                  | 828         |
|    iterations           | 20          |
|    time_elapsed         | 49          |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.009544043 |
|    clip_fraction        | 0.0959      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.758      |
|    explained_variance   | 0.626       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0439      |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0075     |
|    value_loss           | 0.00866     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

------------------------------------
| time/                 |          |
|    fps                | 802      |
|    iterations         | 500      |
|    time_elapsed       | 3        |
|    total_timesteps    | 2500     |
| train/                |          |
|    entropy_loss       | -0.892   |
|    explained_variance | -634     |
|    learning_rate      | 0.0007   |
|    n_updates          | 499      |
|    policy_loss        | 0.00763  |
|    value_loss         | 0.00173  |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 800      |
|    iterations         | 600      |
|    time_elapsed       | 3        |
|    total_timesteps    | 3000     |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | -0.465   |
|    learning_rate      | 0.0007   |
|    n_updates          | 599      |
|    policy_loss        | 0.0381   |
|    value_loss         | 0.00286  |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.71     |
| time/                 |          |
|    fps                | 703      |
|    iterations         | 1900     |
|    time_elapsed       | 13       |
|    total_timesteps    | 9500     |
| train/                |          |
|    entropy_loss       | -0.95    |
|    explained_variance | -33.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1899     |
|    policy_loss        | -0.00173 |
|    value_loss         | 1.87e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.6      |
| time/                 |          |
|    fps                | 707      |
|    iterations         | 2000     |
|    time_elapsed       | 14       |
|    total_timesteps    | 10000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.07     |
| time/                 |          |
|    fps                | 678      |
|    iterations         | 3200     |
|    time_elapsed       | 23       |
|    total_timesteps    | 16000    |
| train/                |          |
|    entropy_loss       | -0.959   |
|    explained_variance | -232     |
|    learning_rate      | 0.0007   |
|    n_updates          | 3199     |
|    policy_loss        | -0.0667  |
|    value_loss         | 0.00647  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.07     |
| time/                 |          |
|    fps                | 680      |
|    iterations         | 3300     |
|    time_elapsed       | 24       |
|    total_timesteps    | 16500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.24     |
| time/                 |          |
|    fps                | 688      |
|    iterations         | 4500     |
|    time_elapsed       | 32       |
|    total_timesteps    | 22500    |
| train/                |          |
|    entropy_loss       | -0.17    |
|    explained_variance | -16.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4499     |
|    policy_loss        | 0.000182 |
|    value_loss         | 2.61e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | 1.24      |
| time/                 |           |
|    fps                | 687       |
|    iterations         | 4600      |
|    time_elapsed       | 33        |
|    total_timesteps    | 23000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.81     |
| time/                 |          |
|    fps                | 680      |
|    iterations         | 5800     |
|    time_elapsed       | 42       |
|    total_timesteps    | 29000    |
| train/                |          |
|    entropy_loss       | -0.898   |
|    explained_variance | 0.562    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5799     |
|    policy_loss        | -0.00328 |
|    value_loss         | 1.59e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 1.81     |
| time/                 |          |
|    fps                | 682      |
|    iterations         | 5900     |
|    time_elapsed       | 43       |
|    total_timesteps    | 29500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.1      |
| time/                 |          |
|    fps                | 673      |
|    iterations         | 7100     |
|    time_elapsed       | 52       |
|    total_timesteps    | 35500    |
| train/                |          |
|    entropy_loss       | -0.492   |
|    explained_variance | -4.76    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7099     |
|    policy_loss        | 0.000752 |
|    value_loss         | 7.05e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.1      |
| time/                 |          |
|    fps                | 674      |
|    iterations         | 7200     |
|    time_elapsed       | 53       |
|    total_timesteps    | 36000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.26     |
| time/                 |          |
|    fps                | 669      |
|    iterations         | 8400     |
|    time_elapsed       | 62       |
|    total_timesteps    | 42000    |
| train/                |          |
|    entropy_loss       | -0.344   |
|    explained_variance | 0.229    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8399     |
|    policy_loss        | 0.00198  |
|    value_loss         | 0.000506 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.26     |
| time/                 |          |
|    fps                | 670      |
|    iterations         | 8500     |
|    time_elapsed       | 63       |
|    total_timesteps    | 42500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | 2.23      |
| time/                 |           |
|    fps                | 674       |
|    iterations         | 9700      |
|    time_elapsed       | 71        |
|    total_timesteps    | 48500     |
| train/                |           |
|    entropy_loss       | -0.0407   |
|    explained_variance | -682      |
|    learning_rate      | 0.0007    |
|    n_updates          | 9699      |
|    policy_loss        | -2.79e-06 |
|    value_loss         | 1.79e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | 2.23     |
| time/                 |          |
|    fps                | 675      |
|    iterations         | 9800     |
|    time_elapsed       | 72       |
|    total_timesteps    | 49000    |
| train/             

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+03    |
|    ep_rew_mean          | 3.1         |
| time/                   |             |
|    fps                  | 835         |
|    iterations           | 8           |
|    time_elapsed         | 19          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.007915322 |
|    clip_fraction        | 0.0775      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | -0.12       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0109     |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0129     |
|    value_loss           | 0.0097      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.78e+

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 4.78e+03     |
|    ep_rew_mean          | 17.1         |
| time/                   |              |
|    fps                  | 778          |
|    iterations           | 18           |
|    time_elapsed         | 47           |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 0.0076874485 |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.849       |
|    explained_variance   | 0.547        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00678     |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.007       |
|    value_loss           | 0.00959      |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-------------------------------------
| time/                 |           |
|    fps                | 851       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -0.526    |
|    explained_variance | -1.12e+06 |
|    learning_rate      | 0.0007    |
|    n_updates          | 199       |
|    policy_loss        | 0.166     |
|    value_loss         | 0.0183    |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 848      |
|    iterations         | 300      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1500     |
| train/                |          |
|    entropy_loss       | -0.79    |
|    explained_variance | -46.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 299      |
|    policy_loss        | -0.00911 |
|    value_loss         

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -3.18    |
| time/                 |          |
|    fps                | 761      |
|    iterations         | 1700     |
|    time_elapsed       | 11       |
|    total_timesteps    | 8500     |
| train/                |          |
|    entropy_loss       | -0.478   |
|    explained_variance | -67.6    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1699     |
|    policy_loss        | 0.000992 |
|    value_loss         | 9.41e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -3.18    |
| time/                 |          |
|    fps                | 732      |
|    iterations         | 1800     |
|    time_elapsed       | 12       |
|    total_timesteps    | 9000     |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -1.3      |
| time/                 |           |
|    fps                | 701       |
|    iterations         | 3000      |
|    time_elapsed       | 21        |
|    total_timesteps    | 15000     |
| train/                |           |
|    entropy_loss       | -0.328    |
|    explained_variance | -21.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 2999      |
|    policy_loss        | -7.14e-05 |
|    value_loss         | 4.59e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -1.3     |
| time/                 |          |
|    fps                | 705      |
|    iterations         | 3100     |
|    time_elapsed       | 21       |
|    total_timesteps    | 15500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.294   |
| time/                 |          |
|    fps                | 716      |
|    iterations         | 4300     |
|    time_elapsed       | 29       |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -0.731   |
|    explained_variance | -16.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4299     |
|    policy_loss        | 0.00448  |
|    value_loss         | 0.000164 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.294   |
| time/                 |          |
|    fps                | 717      |
|    iterations         | 4400     |
|    time_elapsed       | 30       |
|    total_timesteps    | 22000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.563   |
| time/                 |          |
|    fps                | 692      |
|    iterations         | 5600     |
|    time_elapsed       | 40       |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | -279     |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | 0.134    |
|    value_loss         | 0.0191   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.563   |
| time/                 |          |
|    fps                | 691      |
|    iterations         | 5700     |
|    time_elapsed       | 41       |
|    total_timesteps    | 28500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -0.542    |
| time/                 |           |
|    fps                | 676       |
|    iterations         | 6900      |
|    time_elapsed       | 51        |
|    total_timesteps    | 34500     |
| train/                |           |
|    entropy_loss       | -0.277    |
|    explained_variance | -19.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6899      |
|    policy_loss        | -0.000363 |
|    value_loss         | 4.49e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.542   |
| time/                 |          |
|    fps                | 677      |
|    iterations         | 7000     |
|    time_elapsed       | 51       |
|    total_timesteps    | 35000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.53    |
| time/                 |          |
|    fps                | 677      |
|    iterations         | 8200     |
|    time_elapsed       | 60       |
|    total_timesteps    | 41000    |
| train/                |          |
|    entropy_loss       | -0.127   |
|    explained_variance | -1.04    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8199     |
|    policy_loss        | 3.42e-06 |
|    value_loss         | 3.45e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.53    |
| time/                 |          |
|    fps                | 678      |
|    iterations         | 8300     |
|    time_elapsed       | 61       |
|    total_timesteps    | 41500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 4.78e+03 |
|    ep_rew_mean        | -0.55    |
| time/                 |          |
|    fps                | 678      |
|    iterations         | 9500     |
|    time_elapsed       | 70       |
|    total_timesteps    | 47500    |
| train/                |          |
|    entropy_loss       | -0.0502  |
|    explained_variance | -25.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9499     |
|    policy_loss        | 2.33e-05 |
|    value_loss         | 1.08e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 4.78e+03  |
|    ep_rew_mean        | -0.511    |
| time/                 |           |
|    fps                | 679       |
|    iterations         | 9600      |
|    time_elapsed       | 70        |
|    total_timesteps    | 48000     |
| train/                |    