In [1]:
#multiagent ppo
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment for Training
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.entry_price = 0
        self.trades = []
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)
        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, **kwargs):
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:  # Close short
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0
        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:  # Close long
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Mixed Multi-Agent Testing Environment
class MixedMultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(MixedMultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.scaler = scaler

        # Two teams with two agents each
        self.num_teams = 2
        self.agents_per_team = 2
        self.num_agents = self.num_teams * self.agents_per_team

        # Initialize agent balances, positions, and trades
        self.balances = [initial_balance] * self.num_agents
        self.positions = [0] * self.num_agents
        self.entry_prices = [0] * self.num_agents
        self.trades = [[] for _ in range(self.num_agents)]

    def reset(self, **kwargs):
        self.current_step = 0
        self.balances = [self.initial_balance] * self.num_agents
        self.positions = [0] * self.num_agents
        self.entry_prices = [0] * self.num_agents
        self.trades = [[] for _ in range(self.num_agents)]
        return [self._get_observation() for _ in range(self.num_agents)], {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, actions):
        rewards = [0] * self.num_agents
        current_price = self.data.iloc[self.current_step]['close']
        for i, action in enumerate(actions):
            if action == 1:  # Buy
                if self.positions[i] == 0:
                    self.positions[i] = 1
                    self.entry_prices[i] = current_price
                elif self.positions[i] == -1:  # Close short
                    reward = self.entry_prices[i] - current_price
                    self.balances[i] += reward
                    rewards[i] = reward
                    self.positions[i] = 0
                    self.trades[i].append(reward)
            elif action == 2:  # Sell
                if self.positions[i] == 0:
                    self.positions[i] = -1
                    self.entry_prices[i] = current_price
                elif self.positions[i] == 1:  # Close long
                    reward = current_price - self.entry_prices[i]
                    self.balances[i] += reward
                    rewards[i] = reward
                    self.positions[i] = 0
                    self.trades[i].append(reward)

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return [self._get_observation() for _ in range(self.num_agents)], rewards, terminated, truncated, {}

# Calculate individual metrics
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = total_profit / initial_balance
    
    # Calculate positive and negative trades for profit factor
    positive_trades = [trade for trade in trades if trade > 0]
    negative_trades = [trade for trade in trades if trade < 0]
    profit_factor = sum(positive_trades) / abs(sum(negative_trades)) if negative_trades else float('inf')

    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Calculate Sharpe Ratio
    sharpe_ratio = np.mean(trades) / np.std(trades) if np.std(trades) != 0 else 0

    # Sortino Ratio (uses only negative trades as downside deviation)
    downside_std = np.std([trade for trade in trades if trade < 0])
    sortino_ratio = np.mean(trades) / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    cumulative_balance = np.cumsum(trades)
    running_max = np.maximum.accumulate(cumulative_balance)
    drawdown = running_max - cumulative_balance
    max_drawdown = np.max(drawdown) if len(drawdown) > 0 else 0

    return {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }

# Sharpe Ratio-weighted aggregation
def aggregate_metrics_sharpe_weighted(metrics_list):
    # Filter out agents with non-positive Sharpe Ratios
    positive_sharpe_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_sharpe_metrics)
    
    # If no agents have a positive Sharpe Ratio, return zeros for all metrics
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_sharpe_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'NFLX_TRAINING.csv'
    test_file = 'NFLX_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Track training metrics
    training_metrics = []

    # Train each agent independently in single-agent environments
    models = []
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)
        model = PPO("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=50000)
        models.append(model)

        # Calculate training metrics for each agent
        final_balance = env_train.balance
        metrics = calculate_metrics(env_train.trades, env_train.initial_balance, final_balance)
        training_metrics.append(metrics)
        print(f"\n--- Agent {i+1} Training Metrics ---")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")

    # Aggregate training metrics with Sharpe Ratio weighting
    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics for All Agents (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the mixed multi-agent environment
    env_test = MixedMultiAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics for each agent in the testing environment
    testing_metrics = []
    for i in range(4):  # 4 agents
        final_balance = env_test.balances[i]
        metrics = calculate_metrics(env_test.trades[i], env_test.initial_balance, final_balance)
        testing_metrics.append(metrics)
        print(f"\n--- Agent {i+1} Testing Metrics ---")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")

    # Aggregate testing metrics with Sharpe Ratio weighting
    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics for All Agents (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1943 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1454         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0054155514 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.1         |
|    explained_variance   | -0.809       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0308       |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00619     |
|    val

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 17           |
| time/                   |              |
|    fps                  | 1078         |
|    iterations           | 12           |
|    time_elapsed         | 22           |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0071017863 |
|    clip_fraction        | 0.104        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.866       |
|    explained_variance   | 0.61         |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00249     |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00395     |
|    value_loss           | 0.0107       |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 36.4         |
| time/                   |              |
|    fps                  | 1152         |
|    iterations           | 22           |
|    time_elapsed         | 39           |
|    total_timesteps      | 45056        |
| train/                  |              |
|    approx_kl            | 0.0065460764 |
|    clip_fraction        | 0.0835       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.766       |
|    explained_variance   | 0.553        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0117      |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00018     |
|    value_loss           | 0.00993      |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 7.9          |
| time/                   |              |
|    fps                  | 1316         |
|    iterations           | 7            |
|    time_elapsed         | 10           |
|    total_timesteps      | 14336        |
| train/                  |              |
|    approx_kl            | 0.0070443703 |
|    clip_fraction        | 0.0618       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.05        |
|    explained_variance   | 0.125        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0372      |
|    n_updates            | 60           |
|    policy_gradient_loss | -0.0141      |
|    value_loss           | 0.00863      |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 24.8        |
| time/                   |             |
|    fps                  | 1279        |
|    iterations           | 17          |
|    time_elapsed         | 27          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.010200293 |
|    clip_fraction        | 0.0997      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.822      |
|    explained_variance   | 0.577       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.014      |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 0.0125      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+

-----------------------------
| time/              |      |
|    fps             | 1858 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1518         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0073795198 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | -1.66        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0284      |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0059      |
|    value_loss           | 0.0213       |
------------------------------------------
----------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 17.4        |
| time/                   |             |
|    fps                  | 1301        |
|    iterations           | 12          |
|    time_elapsed         | 18          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.012739701 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.92       |
|    explained_variance   | 0.249       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0142      |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00668    |
|    value_loss           | 0.00841     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.14e+03

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.14e+03   |
|    ep_rew_mean          | 36.3       |
| time/                   |            |
|    fps                  | 1287       |
|    iterations           | 22         |
|    time_elapsed         | 35         |
|    total_timesteps      | 45056      |
| train/                  |            |
|    approx_kl            | 0.02205822 |
|    clip_fraction        | 0.135      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.74      |
|    explained_variance   | 0.504      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00194   |
|    n_updates            | 210        |
|    policy_gradient_loss | -0.00396   |
|    value_loss           | 0.00928    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 5.45        |
| time/                   |             |
|    fps                  | 1334        |
|    iterations           | 7           |
|    time_elapsed         | 10          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.009271107 |
|    clip_fraction        | 0.0864      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.497       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0281     |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0139     |
|    value_loss           | 0.00738     |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 21.2        |
| time/                   |             |
|    fps                  | 1293        |
|    iterations           | 17          |
|    time_elapsed         | 26          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.008322539 |
|    clip_fraction        | 0.0816      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.846      |
|    explained_variance   | 0.542       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00425     |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.0118     |
|    value_loss           | 0.0127      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+


--- Agent 1 Testing Metrics ---
Total Profit: 22.91199580067223
Cumulative Return: 0.002291199580067223
Win Rate: 0.6370370370370371
Profit Factor: 6.893924688963166
Sharpe Ratio: 0.4060544843040344
Sortino Ratio: 1.8384695436792704
Maximum Drawdown: 0.22076525711250916

--- Agent 2 Testing Metrics ---
Total Profit: 27.936598937369126
Cumulative Return: 0.0027936598937369126
Win Rate: 0.6888020833333334
Profit Factor: 8.733727375653618
Sharpe Ratio: 0.40873106969729067
Sortino Ratio: 1.9693238192647828
Maximum Drawdown: 0.1946820492268473

--- Agent 3 Testing Metrics ---
Total Profit: 22.19055735726215
Cumulative Return: 0.002219055735726215
Win Rate: 0.6786324786324787
Profit Factor: 5.681812010040585
Sharpe Ratio: 0.38192923840164184
Sortino Ratio: 1.3168392475159405
Maximum Drawdown: 0.5929339364716668

--- Agent 4 Testing Metrics ---
Total Profit: 23.137067388814103
Cumulative Return: 0.00231370673888141
Win Rate: 0.7280701754385965
Profit Factor: 7.362747278196361
Sharpe Ratio: 0

In [2]:
#multiagent DQN
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import DQN
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'NFLX_TRAINING.csv'
    test_file = 'NFLX_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)
        model = DQN("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=50000)
        models.append(model)

        # Record training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.14e+03 |
|    ep_rew_mean      | 0.166    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1349     |
|    time_elapsed     | 21       |
|    total_timesteps  | 28576    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000137 |
|    n_updates        | 7118     |
----------------------------------
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.14e+03 |
|    ep_rew_mean      | 7.96     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1339     |
|    ti

In [3]:
#multiagent a2c
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import A2C
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment with Modified Reward Structure
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Normalized stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        obs -= np.min(obs, axis=0)
        obs /= np.max(obs, axis=0) + 1e-8  # Normalizing to [0,1]
        return obs

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        # Normalize reward and add transaction cost penalty
        reward = (reward / self.initial_balance) - 0.001  # Small penalty for holding a position

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), np.clip(reward, -1, 1), terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'NFLX_TRAINING.csv'
    test_file = 'NFLX_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)
        model = A2C("MlpPolicy", env_train, verbose=1)
        model.learn(total_timesteps=100000)  # Increased timesteps
        models.append(model)

        # Record training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = [model.predict(obs[i])[0] for i, model in enumerate(models)]
        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-------------------------------------
| time/                 |           |
|    fps                | 1036      |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.04     |
|    explained_variance | -2.88e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | 0.0292    |
|    value_loss         | 0.00695   |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 954      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | -80.7    |
|    learning_rate      | 0.0007   |
|   

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 938       |
|    iterations         | 1700      |
|    time_elapsed       | 9         |
|    total_timesteps    | 8500      |
| train/                |           |
|    entropy_loss       | -1.05     |
|    explained_variance | -1.06e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 1699      |
|    policy_loss        | -0.0587   |
|    value_loss         | 0.00247   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 937      |
|    iterations         | 1800     |
|    time_elapsed       | 9        |
|    total_timesteps    | 9000     |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 918      |
|    iterations         | 3000     |
|    time_elapsed       | 16       |
|    total_timesteps    | 15000    |
| train/                |          |
|    entropy_loss       | -0.964   |
|    explained_variance | -21.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2999     |
|    policy_loss        | -0.00158 |
|    value_loss         | 6.85e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 919      |
|    iterations         | 3100     |
|    time_elapsed       | 16       |
|    total_timesteps    | 15500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 924      |
|    iterations         | 4300     |
|    time_elapsed       | 23       |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -1.04    |
|    explained_variance | -29.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4299     |
|    policy_loss        | 0.000991 |
|    value_loss         | 1.93e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 924      |
|    iterations         | 4400     |
|    time_elapsed       | 23       |
|    total_timesteps    | 22000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 925      |
|    iterations         | 5600     |
|    time_elapsed       | 30       |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -0.927   |
|    explained_variance | -9.54    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | -0.00222 |
|    value_loss         | 9.45e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 925       |
|    iterations         | 5700      |
|    time_elapsed       | 30        |
|    total_timesteps    | 28500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 926      |
|    iterations         | 6900     |
|    time_elapsed       | 37       |
|    total_timesteps    | 34500    |
| train/                |          |
|    entropy_loss       | -0.904   |
|    explained_variance | -2.6e+03 |
|    learning_rate      | 0.0007   |
|    n_updates          | 6899     |
|    policy_loss        | 0.000916 |
|    value_loss         | 7.58e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 926       |
|    iterations         | 7000      |
|    time_elapsed       | 37        |
|    total_timesteps    | 35000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 926      |
|    iterations         | 8200     |
|    time_elapsed       | 44       |
|    total_timesteps    | 41000    |
| train/                |          |
|    entropy_loss       | -0.986   |
|    explained_variance | -1.8     |
|    learning_rate      | 0.0007   |
|    n_updates          | 8199     |
|    policy_loss        | 0.000522 |
|    value_loss         | 2.76e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 926      |
|    iterations         | 8300     |
|    time_elapsed       | 44       |
|    total_timesteps    | 41500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 927      |
|    iterations         | 9500     |
|    time_elapsed       | 51       |
|    total_timesteps    | 47500    |
| train/                |          |
|    entropy_loss       | -0.852   |
|    explained_variance | -22.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9499     |
|    policy_loss        | 0.000127 |
|    value_loss         | 5.2e-07  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 927      |
|    iterations         | 9600     |
|    time_elapsed       | 51       |
|    total_timesteps    | 48000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 928       |
|    iterations         | 10800     |
|    time_elapsed       | 58        |
|    total_timesteps    | 54000     |
| train/                |           |
|    entropy_loss       | -0.8      |
|    explained_variance | -3.99     |
|    learning_rate      | 0.0007    |
|    n_updates          | 10799     |
|    policy_loss        | -0.000493 |
|    value_loss         | 4.92e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 928      |
|    iterations         | 10900    |
|    time_elapsed       | 58       |
|    total_timesteps    | 54500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 924      |
|    iterations         | 12100    |
|    time_elapsed       | 65       |
|    total_timesteps    | 60500    |
| train/                |          |
|    entropy_loss       | -0.81    |
|    explained_variance | -48.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12099    |
|    policy_loss        | -0.00221 |
|    value_loss         | 9.4e-06  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 918      |
|    iterations         | 12200    |
|    time_elapsed       | 66       |
|    total_timesteps    | 61000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 884       |
|    iterations         | 13400     |
|    time_elapsed       | 75        |
|    total_timesteps    | 67000     |
| train/                |           |
|    entropy_loss       | -0.249    |
|    explained_variance | -9.45     |
|    learning_rate      | 0.0007    |
|    n_updates          | 13399     |
|    policy_loss        | -2.78e-05 |
|    value_loss         | 1.77e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 885       |
|    iterations         | 13500     |
|    time_elapsed       | 76        |
|    total_timesteps    | 67500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 870       |
|    iterations         | 14700     |
|    time_elapsed       | 84        |
|    total_timesteps    | 73500     |
| train/                |           |
|    entropy_loss       | -0.531    |
|    explained_variance | -1.21     |
|    learning_rate      | 0.0007    |
|    n_updates          | 14699     |
|    policy_loss        | -0.000524 |
|    value_loss         | 6.05e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 870       |
|    iterations         | 14800     |
|    time_elapsed       | 84        |
|    total_timesteps    | 74000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 871       |
|    iterations         | 16000     |
|    time_elapsed       | 91        |
|    total_timesteps    | 80000     |
| train/                |           |
|    entropy_loss       | -0.647    |
|    explained_variance | -671      |
|    learning_rate      | 0.0007    |
|    n_updates          | 15999     |
|    policy_loss        | -0.000236 |
|    value_loss         | 1.99e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 871      |
|    iterations         | 16100    |
|    time_elapsed       | 92       |
|    total_timesteps    | 80500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 875      |
|    iterations         | 17300    |
|    time_elapsed       | 98       |
|    total_timesteps    | 86500    |
| train/                |          |
|    entropy_loss       | -0.262   |
|    explained_variance | -125     |
|    learning_rate      | 0.0007   |
|    n_updates          | 17299    |
|    policy_loss        | 0.000144 |
|    value_loss         | 4.48e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 876      |
|    iterations         | 17400    |
|    time_elapsed       | 99       |
|    total_timesteps    | 87000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 869      |
|    iterations         | 18600    |
|    time_elapsed       | 107      |
|    total_timesteps    | 93000    |
| train/                |          |
|    entropy_loss       | -0.613   |
|    explained_variance | 0.41     |
|    learning_rate      | 0.0007   |
|    n_updates          | 18599    |
|    policy_loss        | 0.00653  |
|    value_loss         | 4.01e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 865      |
|    iterations         | 18700    |
|    time_elapsed       | 107      |
|    total_timesteps    | 93500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 866       |
|    iterations         | 19900     |
|    time_elapsed       | 114       |
|    total_timesteps    | 99500     |
| train/                |           |
|    entropy_loss       | -0.401    |
|    explained_variance | 0.492     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19899     |
|    policy_loss        | -0.000102 |
|    value_loss         | 5.28e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 866       |
|    iterations         | 20000     |
|    time_elapsed       | 115       |
|    total_timesteps    | 100000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.15    |
| time/                 |          |
|    fps                | 869      |
|    iterations         | 1500     |
|    time_elapsed       | 8        |
|    total_timesteps    | 7500     |
| train/                |          |
|    entropy_loss       | -0.72    |
|    explained_variance | -93.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1499     |
|    policy_loss        | -0.0096  |
|    value_loss         | 0.000344 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.15    |
| time/                 |          |
|    fps                | 869      |
|    iterations         | 1600     |
|    time_elapsed       | 9        |
|    total_timesteps    | 8000     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.15    |
| time/                 |          |
|    fps                | 843      |
|    iterations         | 2800     |
|    time_elapsed       | 16       |
|    total_timesteps    | 14000    |
| train/                |          |
|    entropy_loss       | -0.675   |
|    explained_variance | -381     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2799     |
|    policy_loss        | -0.00222 |
|    value_loss         | 6.18e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 843      |
|    iterations         | 2900     |
|    time_elapsed       | 17       |
|    total_timesteps    | 14500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 864      |
|    iterations         | 4100     |
|    time_elapsed       | 23       |
|    total_timesteps    | 20500    |
| train/                |          |
|    entropy_loss       | -0.618   |
|    explained_variance | -18.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4099     |
|    policy_loss        | 0.00172  |
|    value_loss         | 5.81e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 866      |
|    iterations         | 4200     |
|    time_elapsed       | 24       |
|    total_timesteps    | 21000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 858      |
|    iterations         | 5400     |
|    time_elapsed       | 31       |
|    total_timesteps    | 27000    |
| train/                |          |
|    entropy_loss       | -0.717   |
|    explained_variance | -20.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5399     |
|    policy_loss        | -0.0016  |
|    value_loss         | 1.52e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 857      |
|    iterations         | 5500     |
|    time_elapsed       | 32       |
|    total_timesteps    | 27500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 819       |
|    iterations         | 6700      |
|    time_elapsed       | 40        |
|    total_timesteps    | 33500     |
| train/                |           |
|    entropy_loss       | -0.586    |
|    explained_variance | -134      |
|    learning_rate      | 0.0007    |
|    n_updates          | 6699      |
|    policy_loss        | -0.000637 |
|    value_loss         | 1.47e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 814       |
|    iterations         | 6800      |
|    time_elapsed       | 41        |
|    total_timesteps    | 34000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 801      |
|    iterations         | 8000     |
|    time_elapsed       | 49       |
|    total_timesteps    | 40000    |
| train/                |          |
|    entropy_loss       | -0.66    |
|    explained_variance | -12.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7999     |
|    policy_loss        | 7.26e-05 |
|    value_loss         | 5.56e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 801      |
|    iterations         | 8100     |
|    time_elapsed       | 50       |
|    total_timesteps    | 40500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 803       |
|    iterations         | 9300      |
|    time_elapsed       | 57        |
|    total_timesteps    | 46500     |
| train/                |           |
|    entropy_loss       | -0.1      |
|    explained_variance | -20.1     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9299      |
|    policy_loss        | -6.14e-06 |
|    value_loss         | 3.57e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 803       |
|    iterations         | 9400      |
|    time_elapsed       | 58        |
|    total_timesteps    | 47000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 805      |
|    iterations         | 10600    |
|    time_elapsed       | 65       |
|    total_timesteps    | 53000    |
| train/                |          |
|    entropy_loss       | -0.0507  |
|    explained_variance | -19.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10599    |
|    policy_loss        | 1.59e-05 |
|    value_loss         | 4.82e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 805       |
|    iterations         | 10700     |
|    time_elapsed       | 66        |
|    total_timesteps    | 53500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 807      |
|    iterations         | 11900    |
|    time_elapsed       | 73       |
|    total_timesteps    | 59500    |
| train/                |          |
|    entropy_loss       | -0.0506  |
|    explained_variance | -193     |
|    learning_rate      | 0.0007   |
|    n_updates          | 11899    |
|    policy_loss        | 2.91e-06 |
|    value_loss         | 1.24e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 807      |
|    iterations         | 12000    |
|    time_elapsed       | 74       |
|    total_timesteps    | 60000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 804      |
|    iterations         | 13200    |
|    time_elapsed       | 82       |
|    total_timesteps    | 66000    |
| train/                |          |
|    entropy_loss       | -0.0985  |
|    explained_variance | -7.9     |
|    learning_rate      | 0.0007   |
|    n_updates          | 13199    |
|    policy_loss        | 3.02e-05 |
|    value_loss         | 2.51e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 802       |
|    iterations         | 13300     |
|    time_elapsed       | 82        |
|    total_timesteps    | 66500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 794      |
|    iterations         | 14500    |
|    time_elapsed       | 91       |
|    total_timesteps    | 72500    |
| train/                |          |
|    entropy_loss       | -0.0308  |
|    explained_variance | -89.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 14499    |
|    policy_loss        | 1.45e-05 |
|    value_loss         | 1.15e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 795      |
|    iterations         | 14600    |
|    time_elapsed       | 91       |
|    total_timesteps    | 73000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 794       |
|    iterations         | 15800     |
|    time_elapsed       | 99        |
|    total_timesteps    | 79000     |
| train/                |           |
|    entropy_loss       | -0.0167   |
|    explained_variance | -11.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15799     |
|    policy_loss        | -9.64e-06 |
|    value_loss         | 1.83e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 794      |
|    iterations         | 15900    |
|    time_elapsed       | 100      |
|    total_timesteps    | 79500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 795      |
|    iterations         | 17100    |
|    time_elapsed       | 107      |
|    total_timesteps    | 85500    |
| train/                |          |
|    entropy_loss       | -0.0111  |
|    explained_variance | -56.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17099    |
|    policy_loss        | 1.56e-07 |
|    value_loss         | 2.79e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 795      |
|    iterations         | 17200    |
|    time_elapsed       | 108      |
|    total_timesteps    | 86000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 790       |
|    iterations         | 18400     |
|    time_elapsed       | 116       |
|    total_timesteps    | 92000     |
| train/                |           |
|    entropy_loss       | -0.00566  |
|    explained_variance | -8.83     |
|    learning_rate      | 0.0007    |
|    n_updates          | 18399     |
|    policy_loss        | -1.03e-06 |
|    value_loss         | 4.02e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 790      |
|    iterations         | 18500    |
|    time_elapsed       | 117      |
|    total_timesteps    | 92500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 781      |
|    iterations         | 19700    |
|    time_elapsed       | 126      |
|    total_timesteps    | 98500    |
| train/                |          |
|    entropy_loss       | -0.00567 |
|    explained_variance | -8.34    |
|    learning_rate      | 0.0007   |
|    n_updates          | 19699    |
|    policy_loss        | 1.69e-07 |
|    value_loss         | 9.69e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 780      |
|    iterations         | 19800    |
|    time_elapsed       | 126      |
|    total_timesteps    | 99000    |
| train/                |          |
|

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


-------------------------------------
| time/                 |           |
|    fps                | 796       |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.01     |
|    explained_variance | -1.87e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -0.057    |
|    value_loss         | 0.0117    |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 724      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.955   |
|    explained_variance | -295     |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.026    |
|    value_loss         

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 664      |
|    iterations         | 1700     |
|    time_elapsed       | 12       |
|    total_timesteps    | 8500     |
| train/                |          |
|    entropy_loss       | -0.128   |
|    explained_variance | -36.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1699     |
|    policy_loss        | 0.00109  |
|    value_loss         | 0.00198  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 669      |
|    iterations         | 1800     |
|    time_elapsed       | 13       |
|    total_timesteps    | 9000     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 629      |
|    iterations         | 3000     |
|    time_elapsed       | 23       |
|    total_timesteps    | 15000    |
| train/                |          |
|    entropy_loss       | -0.055   |
|    explained_variance | -250     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2999     |
|    policy_loss        | 6.1e-05  |
|    value_loss         | 6.35e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 629      |
|    iterations         | 3100     |
|    time_elapsed       | 24       |
|    total_timesteps    | 15500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 623      |
|    iterations         | 4300     |
|    time_elapsed       | 34       |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -0.427   |
|    explained_variance | -161     |
|    learning_rate      | 0.0007   |
|    n_updates          | 4299     |
|    policy_loss        | 0.0161   |
|    value_loss         | 0.000764 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 621      |
|    iterations         | 4400     |
|    time_elapsed       | 35       |
|    total_timesteps    | 22000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 625      |
|    iterations         | 5600     |
|    time_elapsed       | 44       |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -0.316   |
|    explained_variance | -313     |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | 0.00187  |
|    value_loss         | 7.08e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 628       |
|    iterations         | 5700      |
|    time_elapsed       | 45        |
|    total_timesteps    | 28500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 648       |
|    iterations         | 6900      |
|    time_elapsed       | 53        |
|    total_timesteps    | 34500     |
| train/                |           |
|    entropy_loss       | -0.337    |
|    explained_variance | 0.273     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6899      |
|    policy_loss        | -0.000227 |
|    value_loss         | 5.33e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 649      |
|    iterations         | 7000     |
|    time_elapsed       | 53       |
|    total_timesteps    | 35000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 646      |
|    iterations         | 8200     |
|    time_elapsed       | 63       |
|    total_timesteps    | 41000    |
| train/                |          |
|    entropy_loss       | -0.55    |
|    explained_variance | -24.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8199     |
|    policy_loss        | -0.00161 |
|    value_loss         | 9.78e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 647      |
|    iterations         | 8300     |
|    time_elapsed       | 64       |
|    total_timesteps    | 41500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 665       |
|    iterations         | 9500      |
|    time_elapsed       | 71        |
|    total_timesteps    | 47500     |
| train/                |           |
|    entropy_loss       | -0.59     |
|    explained_variance | -36.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9499      |
|    policy_loss        | -3.92e-05 |
|    value_loss         | 1.01e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 664      |
|    iterations         | 9600     |
|    time_elapsed       | 72       |
|    total_timesteps    | 48000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 666      |
|    iterations         | 10800    |
|    time_elapsed       | 81       |
|    total_timesteps    | 54000    |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | -2.91    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10799    |
|    policy_loss        | 0.00153  |
|    value_loss         | 4.12e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 665      |
|    iterations         | 10900    |
|    time_elapsed       | 81       |
|    total_timesteps    | 54500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 651      |
|    iterations         | 12100    |
|    time_elapsed       | 92       |
|    total_timesteps    | 60500    |
| train/                |          |
|    entropy_loss       | -0.954   |
|    explained_variance | -37      |
|    learning_rate      | 0.0007   |
|    n_updates          | 12099    |
|    policy_loss        | 0.000929 |
|    value_loss         | 2.9e-06  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 651       |
|    iterations         | 12200     |
|    time_elapsed       | 93        |
|    total_timesteps    | 61000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 648       |
|    iterations         | 13400     |
|    time_elapsed       | 103       |
|    total_timesteps    | 67000     |
| train/                |           |
|    entropy_loss       | -0.421    |
|    explained_variance | -7.11     |
|    learning_rate      | 0.0007    |
|    n_updates          | 13399     |
|    policy_loss        | -0.000449 |
|    value_loss         | 1.35e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 647       |
|    iterations         | 13500     |
|    time_elapsed       | 104       |
|    total_timesteps    | 67500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 645      |
|    iterations         | 14700    |
|    time_elapsed       | 113      |
|    total_timesteps    | 73500    |
| train/                |          |
|    entropy_loss       | -0.844   |
|    explained_variance | -6.76    |
|    learning_rate      | 0.0007   |
|    n_updates          | 14699    |
|    policy_loss        | -0.00201 |
|    value_loss         | 3.19e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 646      |
|    iterations         | 14800    |
|    time_elapsed       | 114      |
|    total_timesteps    | 74000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 647      |
|    iterations         | 16000    |
|    time_elapsed       | 123      |
|    total_timesteps    | 80000    |
| train/                |          |
|    entropy_loss       | -0.924   |
|    explained_variance | -13.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15999    |
|    policy_loss        | 0.000477 |
|    value_loss         | 2.28e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 647      |
|    iterations         | 16100    |
|    time_elapsed       | 124      |
|    total_timesteps    | 80500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 652      |
|    iterations         | 17300    |
|    time_elapsed       | 132      |
|    total_timesteps    | 86500    |
| train/                |          |
|    entropy_loss       | -0.781   |
|    explained_variance | -65.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17299    |
|    policy_loss        | 0.00162  |
|    value_loss         | 9.99e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 653       |
|    iterations         | 17400     |
|    time_elapsed       | 133       |
|    total_timesteps    | 87000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 661      |
|    iterations         | 18600    |
|    time_elapsed       | 140      |
|    total_timesteps    | 93000    |
| train/                |          |
|    entropy_loss       | -0.284   |
|    explained_variance | -32.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18599    |
|    policy_loss        | 0.000455 |
|    value_loss         | 4.98e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 662      |
|    iterations         | 18700    |
|    time_elapsed       | 141      |
|    total_timesteps    | 93500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 669       |
|    iterations         | 19900     |
|    time_elapsed       | 148       |
|    total_timesteps    | 99500     |
| train/                |           |
|    entropy_loss       | -0.794    |
|    explained_variance | -20.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19899     |
|    policy_loss        | -0.000639 |
|    value_loss         | 2.71e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 670       |
|    iterations         | 20000     |
|    time_elapsed       | 149       |
|    total_timesteps    | 100000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 815      |
|    iterations         | 1500     |
|    time_elapsed       | 9        |
|    total_timesteps    | 7500     |
| train/                |          |
|    entropy_loss       | -1.04    |
|    explained_variance | -24.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1499     |
|    policy_loss        | 0.0117   |
|    value_loss         | 0.000188 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 816      |
|    iterations         | 1600     |
|    time_elapsed       | 9        |
|    total_timesteps    | 8000     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 807      |
|    iterations         | 2800     |
|    time_elapsed       | 17       |
|    total_timesteps    | 14000    |
| train/                |          |
|    entropy_loss       | -1.05    |
|    explained_variance | -615     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2799     |
|    policy_loss        | 9e-05    |
|    value_loss         | 8.46e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 805      |
|    iterations         | 2900     |
|    time_elapsed       | 17       |
|    total_timesteps    | 14500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 792       |
|    iterations         | 4100      |
|    time_elapsed       | 25        |
|    total_timesteps    | 20500     |
| train/                |           |
|    entropy_loss       | -0.457    |
|    explained_variance | -181      |
|    learning_rate      | 0.0007    |
|    n_updates          | 4099      |
|    policy_loss        | -0.000509 |
|    value_loss         | 1.54e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 792       |
|    iterations         | 4200      |
|    time_elapsed       | 26        |
|    total_timesteps    | 21000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 783      |
|    iterations         | 5400     |
|    time_elapsed       | 34       |
|    total_timesteps    | 27000    |
| train/                |          |
|    entropy_loss       | -0.197   |
|    explained_variance | -78.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5399     |
|    policy_loss        | -7.9e-05 |
|    value_loss         | 7.08e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 781       |
|    iterations         | 5500      |
|    time_elapsed       | 35        |
|    total_timesteps    | 27500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 767       |
|    iterations         | 6700      |
|    time_elapsed       | 43        |
|    total_timesteps    | 33500     |
| train/                |           |
|    entropy_loss       | -0.195    |
|    explained_variance | -16.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6699      |
|    policy_loss        | -0.000161 |
|    value_loss         | 1.2e-05   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 767      |
|    iterations         | 6800     |
|    time_elapsed       | 44       |
|    total_timesteps    | 34000    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 756       |
|    iterations         | 8000      |
|    time_elapsed       | 52        |
|    total_timesteps    | 40000     |
| train/                |           |
|    entropy_loss       | -0.116    |
|    explained_variance | -2.98     |
|    learning_rate      | 0.0007    |
|    n_updates          | 7999      |
|    policy_loss        | -3.81e-05 |
|    value_loss         | 4.1e-06   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 756      |
|    iterations         | 8100     |
|    time_elapsed       | 53       |
|    total_timesteps    | 40500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 744      |
|    iterations         | 9300     |
|    time_elapsed       | 62       |
|    total_timesteps    | 46500    |
| train/                |          |
|    entropy_loss       | -0.202   |
|    explained_variance | -2.97    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9299     |
|    policy_loss        | 3.18e-05 |
|    value_loss         | 9.03e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 744       |
|    iterations         | 9400      |
|    time_elapsed       | 63        |
|    total_timesteps    | 47000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 747      |
|    iterations         | 10600    |
|    time_elapsed       | 70       |
|    total_timesteps    | 53000    |
| train/                |          |
|    entropy_loss       | -0.0754  |
|    explained_variance | -12.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10599    |
|    policy_loss        | 2.94e-05 |
|    value_loss         | 7.33e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 747       |
|    iterations         | 10700     |
|    time_elapsed       | 71        |
|    total_timesteps    | 53500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 749      |
|    iterations         | 11900    |
|    time_elapsed       | 79       |
|    total_timesteps    | 59500    |
| train/                |          |
|    entropy_loss       | -0.26    |
|    explained_variance | -2.54    |
|    learning_rate      | 0.0007   |
|    n_updates          | 11899    |
|    policy_loss        | 0.000209 |
|    value_loss         | 1.04e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 748      |
|    iterations         | 12000    |
|    time_elapsed       | 80       |
|    total_timesteps    | 60000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 747       |
|    iterations         | 13200     |
|    time_elapsed       | 88        |
|    total_timesteps    | 66000     |
| train/                |           |
|    entropy_loss       | -0.112    |
|    explained_variance | -0.792    |
|    learning_rate      | 0.0007    |
|    n_updates          | 13199     |
|    policy_loss        | -7.84e-06 |
|    value_loss         | 5.45e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 747      |
|    iterations         | 13300    |
|    time_elapsed       | 88       |
|    total_timesteps    | 66500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 746      |
|    iterations         | 14500    |
|    time_elapsed       | 97       |
|    total_timesteps    | 72500    |
| train/                |          |
|    entropy_loss       | -0.044   |
|    explained_variance | -8.87    |
|    learning_rate      | 0.0007   |
|    n_updates          | 14499    |
|    policy_loss        | 5.98e-06 |
|    value_loss         | 2.66e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 746       |
|    iterations         | 14600     |
|    time_elapsed       | 97        |
|    total_timesteps    | 73000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 739       |
|    iterations         | 15800     |
|    time_elapsed       | 106       |
|    total_timesteps    | 79000     |
| train/                |           |
|    entropy_loss       | -0.0221   |
|    explained_variance | -45.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15799     |
|    policy_loss        | -3.07e-06 |
|    value_loss         | 1.97e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 738       |
|    iterations         | 15900     |
|    time_elapsed       | 107       |
|    total_timesteps    | 79500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 721      |
|    iterations         | 17100    |
|    time_elapsed       | 118      |
|    total_timesteps    | 85500    |
| train/                |          |
|    entropy_loss       | -0.00638 |
|    explained_variance | -9.88    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17099    |
|    policy_loss        | 6.77e-07 |
|    value_loss         | 1.11e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 721      |
|    iterations         | 17200    |
|    time_elapsed       | 119      |
|    total_timesteps    | 86000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 723      |
|    iterations         | 18400    |
|    time_elapsed       | 127      |
|    total_timesteps    | 92000    |
| train/                |          |
|    entropy_loss       | -0.00544 |
|    explained_variance | -3.06    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18399    |
|    policy_loss        | 1.69e-06 |
|    value_loss         | 1.23e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -7.14    |
| time/                 |          |
|    fps                | 723      |
|    iterations         | 18500    |
|    time_elapsed       | 127      |
|    total_timesteps    | 92500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 726       |
|    iterations         | 19700     |
|    time_elapsed       | 135       |
|    total_timesteps    | 98500     |
| train/                |           |
|    entropy_loss       | -0.0201   |
|    explained_variance | -25.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19699     |
|    policy_loss        | -9.29e-07 |
|    value_loss         | 1.23e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | -7.14     |
| time/                 |           |
|    fps                | 726       |
|    iterations         | 19800     |
|    time_elapsed       | 136       |
|    total_timesteps    | 99000     |
| train/    

In [4]:
#multiagent ensemble
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DQN, A2C
from sklearn.preprocessing import StandardScaler
from collections import Counter

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.scaler = scaler

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        return self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
            elif self.position == -1:
                reward = self.entry_price - current_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
            elif self.position == 1:
                reward = current_price - self.entry_price
                self.balance += reward
                self.trades.append(reward)
                self.position = 0

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

# Multi-Agent Trading Environment
class MultiAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None, num_agents=4):
        super(MultiAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.num_agents = num_agents
        self.agents = [SingleAgentEnv(data, window_size, initial_balance, scaler) for _ in range(num_agents)]

    def reset(self):
        obs = []
        for agent in self.agents:
            agent_obs, _ = agent.reset()
            obs.append(agent_obs)
        return obs

    def step(self, actions):
        obs, rewards, terminated, truncated, infos = [], [], [], [], []
        for agent, action in zip(self.agents, actions):
            agent_obs, reward, done, truncate, info = agent.step(action)
            obs.append(agent_obs)
            rewards.append(reward)
            terminated.append(done)
            truncated.append(truncate)
            infos.append(info)
        return obs, rewards, any(terminated), any(truncated), infos

# Ensemble model function
def ensemble_predict(actions):
    actions = [int(action) for action in actions]
    action_counts = Counter(actions)
    return action_counts.most_common(1)[0][0]

# Function to calculate metrics for each agent
def calculate_metrics(trades, initial_balance, final_balance):
    total_profit = final_balance - initial_balance
    cumulative_return = (final_balance - initial_balance) / initial_balance
    win_rate = len([trade for trade in trades if trade > 0]) / len(trades) if trades else 0
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf
    returns = np.array(trades)
    sharpe_ratio = np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = np.mean(returns) / downside_std if downside_std != 0 else 0
    max_drawdown = np.max(np.maximum.accumulate(np.cumsum(trades)) - np.cumsum(trades)) if trades else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Sharpe Ratio-weighted aggregation for combined metrics
def aggregate_metrics_sharpe_weighted(metrics_list):
    positive_metrics = [m for m in metrics_list if m["Sharpe Ratio"] > 0]
    total_sharpe = sum(m["Sharpe Ratio"] for m in positive_metrics)
    if total_sharpe == 0:
        return {metric: 0 for metric in metrics_list[0]}
    
    combined_metrics = {
        "Total Profit": sum(m["Total Profit"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Cumulative Return": sum(m["Cumulative Return"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Win Rate": sum(m["Win Rate"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Profit Factor": sum(m["Profit Factor"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sharpe Ratio": sum(m["Sharpe Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Sortino Ratio": sum(m["Sortino Ratio"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
        "Maximum Drawdown": sum(m["Maximum Drawdown"] * m["Sharpe Ratio"] / total_sharpe for m in positive_metrics),
    }
    return combined_metrics

# Train and evaluate each agent
def train_and_evaluate():
    train_file = 'NFLX_TRAINING.csv'
    test_file = 'NFLX_TESTING.csv'
    df_train, df_test, scaler = load_and_normalize_data(train_file, test_file)

    training_metrics = []
    ensemble_models = []

    # Train each agent independently
    for i in range(4):  # 4 agents
        env_train = SingleAgentEnv(df_train, window_size=10, scaler=scaler)

        # Initialize each model
        ppo_model = PPO("MlpPolicy", env_train, verbose=1)
        dqn_model = DQN("MlpPolicy", env_train, verbose=1)
        a2c_model = A2C("MlpPolicy", env_train, verbose=1)

        # Train each model
        ppo_model.learn(total_timesteps=50000)
        dqn_model.learn(total_timesteps=50000)
        a2c_model.learn(total_timesteps=50000)

        # Store trained models in a list
        ensemble_models.append((ppo_model, dqn_model, a2c_model))

        # Calculate training metrics
        training_metrics.append(calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance))

    combined_training_metrics = aggregate_metrics_sharpe_weighted(training_metrics)
    print("\n=== Combined Training Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_training_metrics.items():
        print(f"{metric}: {value}")

    # Test in the multi-agent environment
    env_test = MultiAgentEnv(df_test, window_size=10, scaler=scaler, num_agents=4)
    obs = env_test.reset()
    done = False
    while not done:
        actions = []
        for i, (ppo_model, dqn_model, a2c_model) in enumerate(ensemble_models):
            ppo_action, _ = ppo_model.predict(obs[i])
            dqn_action, _ = dqn_model.predict(obs[i])
            a2c_action, _ = a2c_model.predict(obs[i])
            final_action = ensemble_predict([ppo_action, dqn_action, a2c_action])
            actions.append(final_action)

        obs, rewards, done, truncated, _ = env_test.step(actions)

    # Calculate and display testing metrics
    testing_metrics = []
    for agent in env_test.agents:
        testing_metrics.append(calculate_metrics(agent.trades, agent.initial_balance, agent.balance))

    combined_testing_metrics = aggregate_metrics_sharpe_weighted(testing_metrics)
    print("\n=== Combined Testing Metrics (Sharpe Ratio Weighted) ===")
    for metric, value in combined_testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1443 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1116        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010230035 |
|    clip_fraction        | 0.0791      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -7.41       |
|   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 13.5         |
| time/                   |              |
|    fps                  | 881          |
|    iterations           | 11           |
|    time_elapsed         | 25           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0076799933 |
|    clip_fraction        | 0.069        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.896       |
|    explained_variance   | 0.282        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0244      |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00944     |
|    value_loss           | 0.00774      |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 34.1         |
| time/                   |              |
|    fps                  | 853          |
|    iterations           | 21           |
|    time_elapsed         | 50           |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0067515243 |
|    clip_fraction        | 0.075        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.683       |
|    explained_variance   | 0.631        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0436      |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00802     |
|    value_loss           | 0.00852      |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------
| time/                 |          |
|    fps                | 760      |
|    iterations         | 800      |
|    time_elapsed       | 5        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | -5.94    |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | -0.0271  |
|    value_loss         | 0.00863  |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 717      |
|    iterations         | 900      |
|    time_elapsed       | 6        |
|    total_timesteps    | 4500     |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | -1.34    |
|    learning_rate      | 0.0007   |
|    n_updates          | 899      |
|    policy_loss        | -0.0564  |
|    value_loss         | 0.00202  |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -2.1     |
| time/                 |          |
|    fps                | 721      |
|    iterations         | 2300     |
|    time_elapsed       | 15       |
|    total_timesteps    | 11500    |
| train/                |          |
|    entropy_loss       | -1.04    |
|    explained_variance | -2.02    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2299     |
|    policy_loss        | -0.0346  |
|    value_loss         | 0.00191  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -2.1     |
| time/                 |          |
|    fps                | 723      |
|    iterations         | 2400     |
|    time_elapsed       | 16       |
|    total_timesteps    | 12000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 0.174    |
| time/                 |          |
|    fps                | 723      |
|    iterations         | 3600     |
|    time_elapsed       | 24       |
|    total_timesteps    | 18000    |
| train/                |          |
|    entropy_loss       | -1.07    |
|    explained_variance | 0.504    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3599     |
|    policy_loss        | 0.0308   |
|    value_loss         | 0.00094  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 0.174    |
| time/                 |          |
|    fps                | 710      |
|    iterations         | 3700     |
|    time_elapsed       | 26       |
|    total_timesteps    | 18500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 2.23      |
| time/                 |           |
|    fps                | 675       |
|    iterations         | 5000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 25000     |
| train/                |           |
|    entropy_loss       | -1.06     |
|    explained_variance | -5.24e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 4999      |
|    policy_loss        | 0.00773   |
|    value_loss         | 0.000133  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 2.23     |
| time/                 |          |
|    fps                | 677      |
|    iterations         | 5100     |
|    time_elapsed       | 37       |
|    total_timesteps    | 25500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 3.2       |
| time/                 |           |
|    fps                | 699       |
|    iterations         | 6300      |
|    time_elapsed       | 45        |
|    total_timesteps    | 31500     |
| train/                |           |
|    entropy_loss       | -0.468    |
|    explained_variance | -48.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6299      |
|    policy_loss        | -0.000293 |
|    value_loss         | 1.49e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.2      |
| time/                 |          |
|    fps                | 701      |
|    iterations         | 6400     |
|    time_elapsed       | 45       |
|    total_timesteps    | 32000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.47     |
| time/                 |          |
|    fps                | 712      |
|    iterations         | 7600     |
|    time_elapsed       | 53       |
|    total_timesteps    | 38000    |
| train/                |          |
|    entropy_loss       | -0.739   |
|    explained_variance | -48.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7599     |
|    policy_loss        | -0.0375  |
|    value_loss         | 0.00221  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.47     |
| time/                 |          |
|    fps                | 712      |
|    iterations         | 7700     |
|    time_elapsed       | 54       |
|    total_timesteps    | 38500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 6.3      |
| time/                 |          |
|    fps                | 723      |
|    iterations         | 8900     |
|    time_elapsed       | 61       |
|    total_timesteps    | 44500    |
| train/                |          |
|    entropy_loss       | -0.0171  |
|    explained_variance | -42.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8899     |
|    policy_loss        | -0.00161 |
|    value_loss         | 7.6e-07  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 6.3      |
| time/                 |          |
|    fps                | 724      |
|    iterations         | 9000     |
|    time_elapsed       | 62       |
|    total_timesteps    | 45000    |
| train/                |          |
|

-----------------------------------------
| time/                   |             |
|    fps                  | 976         |
|    iterations           | 3           |
|    time_elapsed         | 6           |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.007123075 |
|    clip_fraction        | 0.044       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.0305      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.025      |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00847    |
|    value_loss           | 0.0121      |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14e+03     |
|    ep_rew_mean          | 2.19         |
| time/                   |              |
|    fps                  | 9

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 19.5        |
| time/                   |             |
|    fps                  | 902         |
|    iterations           | 13          |
|    time_elapsed         | 29          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.009798544 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.927      |
|    explained_variance   | 0.657       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0231     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0121     |
|    value_loss           | 0.00898     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 40.9        |
| time/                   |             |
|    fps                  | 893         |
|    iterations           | 23          |
|    time_elapsed         | 52          |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.008285653 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.726      |
|    explained_variance   | 0.68        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00325     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00599    |
|    value_loss           | 0.0125      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+

------------------------------------
| time/                 |          |
|    fps                | 712      |
|    iterations         | 1100     |
|    time_elapsed       | 7        |
|    total_timesteps    | 5500     |
| train/                |          |
|    entropy_loss       | -0.197   |
|    explained_variance | -36.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1099     |
|    policy_loss        | 0.000295 |
|    value_loss         | 0.00046  |
------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 714       |
|    iterations         | 1200      |
|    time_elapsed       | 8         |
|    total_timesteps    | 6000      |
| train/                |           |
|    entropy_loss       | -0.185    |
|    explained_variance | -7.91     |
|    learning_rate      | 0.0007    |
|    n_updates          | 1199      |
|    policy_loss        | -0.000617 |
|    value_loss         | 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -2.49    |
| time/                 |          |
|    fps                | 720      |
|    iterations         | 2500     |
|    time_elapsed       | 17       |
|    total_timesteps    | 12500    |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | -0.685   |
|    learning_rate      | 0.0007   |
|    n_updates          | 2499     |
|    policy_loss        | -0.0258  |
|    value_loss         | 0.00415  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -2.49    |
| time/                 |          |
|    fps                | 720      |
|    iterations         | 2600     |
|    time_elapsed       | 18       |
|    total_timesteps    | 13000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -0.464   |
| time/                 |          |
|    fps                | 732      |
|    iterations         | 3800     |
|    time_elapsed       | 25       |
|    total_timesteps    | 19000    |
| train/                |          |
|    entropy_loss       | -0.242   |
|    explained_variance | -3.49    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3799     |
|    policy_loss        | 0.000424 |
|    value_loss         | 6.09e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | -0.464   |
| time/                 |          |
|    fps                | 732      |
|    iterations         | 3900     |
|    time_elapsed       | 26       |
|    total_timesteps    | 19500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 0.159    |
| time/                 |          |
|    fps                | 738      |
|    iterations         | 5100     |
|    time_elapsed       | 34       |
|    total_timesteps    | 25500    |
| train/                |          |
|    entropy_loss       | -0.411   |
|    explained_variance | -0.637   |
|    learning_rate      | 0.0007   |
|    n_updates          | 5099     |
|    policy_loss        | 0.00448  |
|    value_loss         | 6.92e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 0.159    |
| time/                 |          |
|    fps                | 739      |
|    iterations         | 5200     |
|    time_elapsed       | 35       |
|    total_timesteps    | 26000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 0.515     |
| time/                 |           |
|    fps                | 748       |
|    iterations         | 6400      |
|    time_elapsed       | 42        |
|    total_timesteps    | 32000     |
| train/                |           |
|    entropy_loss       | -0.155    |
|    explained_variance | -3.2e+04  |
|    learning_rate      | 0.0007    |
|    n_updates          | 6399      |
|    policy_loss        | -2.18e-06 |
|    value_loss         | 1.19e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 0.515    |
| time/                 |          |
|    fps                | 749      |
|    iterations         | 6500     |
|    time_elapsed       | 43       |
|    total_timesteps    | 32500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 1.22      |
| time/                 |           |
|    fps                | 758       |
|    iterations         | 7700      |
|    time_elapsed       | 50        |
|    total_timesteps    | 38500     |
| train/                |           |
|    entropy_loss       | -0.043    |
|    explained_variance | -11.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 7699      |
|    policy_loss        | -3.37e-07 |
|    value_loss         | 2.74e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 1.22      |
| time/                 |           |
|    fps                | 759       |
|    iterations         | 7800      |
|    time_elapsed       | 51        |
|    total_timesteps    | 39000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 1.57     |
| time/                 |          |
|    fps                | 766      |
|    iterations         | 9000     |
|    time_elapsed       | 58       |
|    total_timesteps    | 45000    |
| train/                |          |
|    entropy_loss       | -0.306   |
|    explained_variance | 0.273    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8999     |
|    policy_loss        | 0.0805   |
|    value_loss         | 0.0071   |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 1.57      |
| time/                 |           |
|    fps                | 766       |
|    iterations         | 9100      |
|    time_elapsed       | 59        |
|    total_timesteps    | 45500     |
| train/                |    

-----------------------------------------
| time/                   |             |
|    fps                  | 950         |
|    iterations           | 3           |
|    time_elapsed         | 6           |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.006798896 |
|    clip_fraction        | 0.0414      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.266       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0378     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00859    |
|    value_loss           | 0.0168      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | -1.44       |
| time/                   |             |
|    fps                  | 936   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 15.2        |
| time/                   |             |
|    fps                  | 884         |
|    iterations           | 13          |
|    time_elapsed         | 30          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.008035535 |
|    clip_fraction        | 0.088       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.923      |
|    explained_variance   | 0.686       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00602    |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0134     |
|    value_loss           | 0.00993     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.14e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 34          |
| time/                   |             |
|    fps                  | 885         |
|    iterations           | 23          |
|    time_elapsed         | 53          |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.010192019 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.816      |
|    explained_variance   | 0.648       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00812     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0103     |
|    value_loss           | 0.0129      |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.14

------------------------------------
| time/                 |          |
|    fps                | 764      |
|    iterations         | 1100     |
|    time_elapsed       | 7        |
|    total_timesteps    | 5500     |
| train/                |          |
|    entropy_loss       | -0.829   |
|    explained_variance | -102     |
|    learning_rate      | 0.0007   |
|    n_updates          | 1099     |
|    policy_loss        | 0.0241   |
|    value_loss         | 0.000663 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 766      |
|    iterations         | 1200     |
|    time_elapsed       | 7        |
|    total_timesteps    | 6000     |
| train/                |          |
|    entropy_loss       | -0.877   |
|    explained_variance | -0.239   |
|    learning_rate      | 0.0007   |
|    n_updates          | 1199     |
|    policy_loss        | -0.00829 |
|    value_loss         | 0.00021  |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.67     |
| time/                 |          |
|    fps                | 686      |
|    iterations         | 2500     |
|    time_elapsed       | 18       |
|    total_timesteps    | 12500    |
| train/                |          |
|    entropy_loss       | -0.531   |
|    explained_variance | -1.31    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2499     |
|    policy_loss        | -0.00893 |
|    value_loss         | 0.00151  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.67     |
| time/                 |          |
|    fps                | 689      |
|    iterations         | 2600     |
|    time_elapsed       | 18       |
|    total_timesteps    | 13000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.82     |
| time/                 |          |
|    fps                | 657      |
|    iterations         | 3800     |
|    time_elapsed       | 28       |
|    total_timesteps    | 19000    |
| train/                |          |
|    entropy_loss       | -1.05    |
|    explained_variance | -0.0325  |
|    learning_rate      | 0.0007   |
|    n_updates          | 3799     |
|    policy_loss        | -0.0283  |
|    value_loss         | 0.00116  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.82     |
| time/                 |          |
|    fps                | 659      |
|    iterations         | 3900     |
|    time_elapsed       | 29       |
|    total_timesteps    | 19500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.28     |
| time/                 |          |
|    fps                | 663      |
|    iterations         | 5100     |
|    time_elapsed       | 38       |
|    total_timesteps    | 25500    |
| train/                |          |
|    entropy_loss       | -0.962   |
|    explained_variance | 0.181    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5099     |
|    policy_loss        | 0.00732  |
|    value_loss         | 7.4e-05  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.28     |
| time/                 |          |
|    fps                | 665      |
|    iterations         | 5200     |
|    time_elapsed       | 39       |
|    total_timesteps    | 26000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 2.23      |
| time/                 |           |
|    fps                | 676       |
|    iterations         | 6400      |
|    time_elapsed       | 47        |
|    total_timesteps    | 32000     |
| train/                |           |
|    entropy_loss       | -0.581    |
|    explained_variance | -2.33     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6399      |
|    policy_loss        | -0.000164 |
|    value_loss         | 2.15e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 2.23     |
| time/                 |          |
|    fps                | 677      |
|    iterations         | 6500     |
|    time_elapsed       | 47       |
|    total_timesteps    | 32500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 2.11     |
| time/                 |          |
|    fps                | 667      |
|    iterations         | 7700     |
|    time_elapsed       | 57       |
|    total_timesteps    | 38500    |
| train/                |          |
|    entropy_loss       | -0.0157  |
|    explained_variance | -5.71    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7699     |
|    policy_loss        | 2.43e-05 |
|    value_loss         | 0.000133 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 2.11      |
| time/                 |           |
|    fps                | 668       |
|    iterations         | 7800      |
|    time_elapsed       | 58        |
|    total_timesteps    | 39000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 1.75      |
| time/                 |           |
|    fps                | 674       |
|    iterations         | 9000      |
|    time_elapsed       | 66        |
|    total_timesteps    | 45000     |
| train/                |           |
|    entropy_loss       | -0.00771  |
|    explained_variance | -431      |
|    learning_rate      | 0.0007    |
|    n_updates          | 8999      |
|    policy_loss        | -7.66e-06 |
|    value_loss         | 6.86e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 1.75     |
| time/                 |          |
|    fps                | 675      |
|    iterations         | 9100     |
|    time_elapsed       | 67       |
|    total_timesteps    | 45500    |
| train/             

-----------------------------------------
| time/                   |             |
|    fps                  | 982         |
|    iterations           | 3           |
|    time_elapsed         | 6           |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.008902987 |
|    clip_fraction        | 0.0758      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.503       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00958     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00962    |
|    value_loss           | 0.0145      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | -0.011      |
| time/                   |             |
|    fps                  | 957   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 15.6        |
| time/                   |             |
|    fps                  | 919         |
|    iterations           | 13          |
|    time_elapsed         | 28          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.008978034 |
|    clip_fraction        | 0.0945      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.924      |
|    explained_variance   | 0.561       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00174     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0145     |
|    value_loss           | 0.00812     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+03    |
|    ep_rew_mean          | 35.3        |
| time/                   |             |
|    fps                  | 968         |
|    iterations           | 23          |
|    time_elapsed         | 48          |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.007483241 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.754      |
|    explained_variance   | 0.486       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0106     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00539    |
|    value_loss           | 0.0113      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14e+

------------------------------------
| time/                 |          |
|    fps                | 792      |
|    iterations         | 1100     |
|    time_elapsed       | 6        |
|    total_timesteps    | 5500     |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | 0.162    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1099     |
|    policy_loss        | -0.121   |
|    value_loss         | 0.0118   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 792      |
|    iterations         | 1200     |
|    time_elapsed       | 7        |
|    total_timesteps    | 6000     |
| train/                |          |
|    entropy_loss       | -1.09    |
|    explained_variance | -0.526   |
|    learning_rate      | 0.0007   |
|    n_updates          | 1199     |
|    policy_loss        | -0.0103  |
|    value_loss         | 0.000194 |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.78     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 2500     |
|    time_elapsed       | 15       |
|    total_timesteps    | 12500    |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | -0.876   |
|    learning_rate      | 0.0007   |
|    n_updates          | 2499     |
|    policy_loss        | -0.0496  |
|    value_loss         | 0.00569  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.78     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 2600     |
|    time_elapsed       | 16       |
|    total_timesteps    | 13000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.03     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 3800     |
|    time_elapsed       | 24       |
|    total_timesteps    | 19000    |
| train/                |          |
|    entropy_loss       | -0.365   |
|    explained_variance | -3.42    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3799     |
|    policy_loss        | 1.01e-05 |
|    value_loss         | 2.25e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 4.03     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 3900     |
|    time_elapsed       | 24       |
|    total_timesteps    | 19500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.78     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 5100     |
|    time_elapsed       | 32       |
|    total_timesteps    | 25500    |
| train/                |          |
|    entropy_loss       | -0.0995  |
|    explained_variance | -0.498   |
|    learning_rate      | 0.0007   |
|    n_updates          | 5099     |
|    policy_loss        | -5.5e-05 |
|    value_loss         | 1.05e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.78     |
| time/                 |          |
|    fps                | 786      |
|    iterations         | 5200     |
|    time_elapsed       | 33       |
|    total_timesteps    | 26000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.19     |
| time/                 |          |
|    fps                | 788      |
|    iterations         | 6400     |
|    time_elapsed       | 40       |
|    total_timesteps    | 32000    |
| train/                |          |
|    entropy_loss       | -0.558   |
|    explained_variance | -2.61    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6399     |
|    policy_loss        | 0.00403  |
|    value_loss         | 0.000105 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.14e+03  |
|    ep_rew_mean        | 3.19      |
| time/                 |           |
|    fps                | 788       |
|    iterations         | 6500      |
|    time_elapsed       | 41        |
|    total_timesteps    | 32500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 2.77     |
| time/                 |          |
|    fps                | 789      |
|    iterations         | 7700     |
|    time_elapsed       | 48       |
|    total_timesteps    | 38500    |
| train/                |          |
|    entropy_loss       | -0.564   |
|    explained_variance | -0.00949 |
|    learning_rate      | 0.0007   |
|    n_updates          | 7699     |
|    policy_loss        | -0.00515 |
|    value_loss         | 0.00023  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 2.77     |
| time/                 |          |
|    fps                | 789      |
|    iterations         | 7800     |
|    time_elapsed       | 49       |
|    total_timesteps    | 39000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.31     |
| time/                 |          |
|    fps                | 791      |
|    iterations         | 9000     |
|    time_elapsed       | 56       |
|    total_timesteps    | 45000    |
| train/                |          |
|    entropy_loss       | -0.677   |
|    explained_variance | -0.0694  |
|    learning_rate      | 0.0007   |
|    n_updates          | 8999     |
|    policy_loss        | -0.0218  |
|    value_loss         | 0.00586  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.14e+03 |
|    ep_rew_mean        | 3.31     |
| time/                 |          |
|    fps                | 792      |
|    iterations         | 9100     |
|    time_elapsed       | 57       |
|    total_timesteps    | 45500    |
| train/                |          |
|