In [1]:
#ppo algorithm single agent
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.log = []  # Log for detailed reporting
        self.scaler = scaler  # Store the scaler for inverse scaling

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        self.log = []  # Reset log

        # Log initial holdings
        self.log.append(f"Agent starts with 0 holdings (neutral position), Initial Balance: {self.balance}")
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        return obs

    def inverse_scale_price(self, price):
        # Inverse transform the scaled price to get the original value (considering the 'close' column)
        inverse_scaled = self.scaler.inverse_transform([[0, 0, 0, price, 0]])[0][3]
        return inverse_scaled

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        original_price = self.inverse_scale_price(current_price)  # Get original (inverse-scaled) price

        # If agent buys
        if action == 1:
            if self.position == 0:  # Only buy if neutral
                self.position = 1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent buys at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Long")
            elif self.position == -1:  # Close short position
                reward = self.entry_price - current_price  # Scaled reward
                original_reward = self.inverse_scale_price(self.entry_price) - original_price
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes short at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        # If agent sells
        elif action == 2:
            if self.position == 0:  # Only sell if neutral
                self.position = -1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent sells (short) at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Short")
            elif self.position == 1:  # Close long position
                reward = current_price - self.entry_price  # Scaled reward
                original_reward = original_price - self.inverse_scale_price(self.entry_price)
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes long at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

    def generate_report(self):
        print("\n--- Agent Report ---")
        for log in self.log:
            print(log)
        print(f"Final Balance: {self.balance}")
        print(f"Total Profit: {self.balance - self.initial_balance}")
        print(f"Number of Holdings (Long): {1 if self.position == 1 else 0}, Short: {1 if self.position == -1 else 0}")
        print("-" * 40)

# Function to calculate additional metrics
def calculate_metrics(trades, initial_balance, final_balance):
    # Total Profit
    total_profit = final_balance - initial_balance

    # Cumulative Return
    cumulative_return = (final_balance - initial_balance) / initial_balance

    # Win Rate
    positive_trades = [trade for trade in trades if trade > 0]
    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Profit Factor
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf

    # Sharpe Ratio
    returns = np.array(trades)
    mean_return = np.mean(returns)
    std_return = np.std(returns)
    sharpe_ratio = mean_return / std_return if std_return != 0 else 0

    # Sortino Ratio (using only downside standard deviation)
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = mean_return / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    balance_series = np.cumsum([initial_balance] + trades)  # Series of balance over time
    peak_balance = np.maximum.accumulate(balance_series)
    drawdowns = (peak_balance - balance_series) / peak_balance
    max_drawdown = np.max(drawdowns) if drawdowns.size > 0 else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Train and evaluate the model with all metrics
def train_and_evaluate():
    # Load and normalize the data
    train_file = 'NVDA_TRAINING.csv'
    test_file = 'NVDA_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Create the environment using the training data
    env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)

    # Initialize the PPO model and train
    model = PPO("MlpPolicy", env_train, verbose=1)
    model.learn(total_timesteps=100000)

    # Test the model on the training data
    obs, _ = env_train.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_train.step(action)

    # Generate report for the training session
    env_train.generate_report()

    # Calculate and display metrics for the training period
    training_metrics = calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance)
    print("\n--- Training Metrics ---")
    for metric, value in training_metrics.items():
        print(f"{metric}: {value}")

    # Test the model on the testing data
    env_test = SingleAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_test.step(action)

    # Generate report for the testing session
    env_test.generate_report()

    # Calculate and display metrics for the testing period
    testing_metrics = calculate_metrics(env_test.trades, env_test.initial_balance, env_test.balance)
    print("\n--- Testing Metrics ---")
    for metric, value in testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1865 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1497        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.006927577 |
|    clip_fraction        | 0.0344      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -2.71       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0168     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00649    |
|    value_loss         

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.93e+03     |
|    ep_rew_mean          | 0.86         |
| time/                   |              |
|    fps                  | 1339         |
|    iterations           | 12           |
|    time_elapsed         | 18           |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0072234278 |
|    clip_fraction        | 0.0651       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.277        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.001        |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00967     |
|    value_loss           | 0.00133      |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+03    |
|    ep_rew_mean          | 4.01        |
| time/                   |             |
|    fps                  | 1331        |
|    iterations           | 22          |
|    time_elapsed         | 33          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.011513409 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.958      |
|    explained_variance   | 0.143       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0128      |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.0145     |
|    value_loss           | 0.000954    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+03    |
|    ep_rew_mean          | 9.5         |
| time/                   |             |
|    fps                  | 1330        |
|    iterations           | 32          |
|    time_elapsed         | 49          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.014328163 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.744      |
|    explained_variance   | 0.628       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00581    |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.0032     |
|    value_loss           | 0.00723     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.93e+03     |
|    ep_rew_mean          | 12.4         |
| time/                   |              |
|    fps                  | 1329         |
|    iterations           | 42           |
|    time_elapsed         | 64           |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0067829196 |
|    clip_fraction        | 0.0928       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.815       |
|    explained_variance   | 0.319        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0107      |
|    n_updates            | 410          |
|    policy_gradient_loss | -0.00718     |
|    value_loss           | 0.0014       |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m


--- Agent Report ---
Agent starts with 0 holdings (neutral position), Initial Balance: 10000
Agent buys at 29.793, Current Balance: 10000, Holdings: 1 Long
Agent closes long at 29.938, profit: 0.14499999999999957, Current Balance: 10000.145, Holdings: 0
Agent buys at 29.945, Current Balance: 10000.145, Holdings: 1 Long
Agent closes long at 30.095800000000004, profit: 0.15080000000000382, Current Balance: 10000.2958, Holdings: 0
Agent sells (short) at 30.512, Current Balance: 10000.2958, Holdings: 1 Short
Agent closes short at 30.196, profit: 0.31599999999999895, Current Balance: 10000.6118, Holdings: 0
Agent buys at 29.964, Current Balance: 10000.6118, Holdings: 1 Long
Agent closes long at 30.134, profit: 0.1700000000000017, Current Balance: 10000.7818, Holdings: 0
Agent sells (short) at 30.075, Current Balance: 10000.7818, Holdings: 1 Short
Agent closes short at 28.627000000000002, profit: 1.4479999999999968, Current Balance: 10002.229800000001, Holdings: 0
Agent buys at 28.7, Curren

In [2]:
#dqn algorithm single agent
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import DQN
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.log = []  # Log for detailed reporting
        self.scaler = scaler  # Store the scaler for inverse scaling

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        self.log = []  # Reset log

        # Log initial holdings
        self.log.append(f"Agent starts with 0 holdings (neutral position), Initial Balance: {self.balance}")
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        return obs

    def inverse_scale_price(self, price):
        # Inverse transform the scaled price to get the original value (considering the 'close' column)
        inverse_scaled = self.scaler.inverse_transform([[0, 0, 0, price, 0]])[0][3]
        return inverse_scaled

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        original_price = self.inverse_scale_price(current_price)  # Get original (inverse-scaled) price

        # If agent buys
        if action == 1:
            if self.position == 0:  # Only buy if neutral
                self.position = 1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent buys at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Long")
            elif self.position == -1:  # Close short position
                reward = self.entry_price - current_price  # Scaled reward
                original_reward = self.inverse_scale_price(self.entry_price) - original_price
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes short at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        # If agent sells
        elif action == 2:
            if self.position == 0:  # Only sell if neutral
                self.position = -1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent sells (short) at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Short")
            elif self.position == 1:  # Close long position
                reward = current_price - self.entry_price  # Scaled reward
                original_reward = original_price - self.inverse_scale_price(self.entry_price)
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes long at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

    def generate_report(self):
        print("\n--- Agent Report ---")
        for log in self.log:
            print(log)
        print(f"Final Balance: {self.balance}")
        print(f"Total Profit: {self.balance - self.initial_balance}")
        print(f"Number of Holdings (Long): {1 if self.position == 1 else 0}, Short: {1 if self.position == -1 else 0}")
        print("-" * 40)

# Function to calculate additional metrics
def calculate_metrics(trades, initial_balance, final_balance):
    # Total Profit
    total_profit = final_balance - initial_balance

    # Cumulative Return
    cumulative_return = (final_balance - initial_balance) / initial_balance

    # Win Rate
    positive_trades = [trade for trade in trades if trade > 0]
    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Profit Factor
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf

    # Sharpe Ratio
    returns = np.array(trades)
    mean_return = np.mean(returns)
    std_return = np.std(returns)
    sharpe_ratio = mean_return / std_return if std_return != 0 else 0

    # Sortino Ratio (using only downside standard deviation)
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = mean_return / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    balance_series = np.cumsum([initial_balance] + trades)  # Series of balance over time
    peak_balance = np.maximum.accumulate(balance_series)
    drawdowns = (peak_balance - balance_series) / peak_balance
    max_drawdown = np.max(drawdowns) if drawdowns.size > 0 else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Train and evaluate the model with all metrics
def train_and_evaluate():
    # Load and normalize the data
    train_file = 'NVDA_TRAINING.csv'
    test_file = 'NVDA_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Create the environment using the training data
    env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)

    # Initialize the DQN model and train
    model = DQN("MlpPolicy", env_train, verbose=1)
    model.learn(total_timesteps=100000)

    # Test the model on the training data
    obs, _ = env_train.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_train.step(action)

    # Generate report for the training session
    env_train.generate_report()

    # Calculate and display metrics for the training period
    training_metrics = calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance)
    print("\n--- Training Metrics ---")
    for metric, value in training_metrics.items():
        print(f"{metric}: {value}")

    # Test the model on the testing data
    env_test = SingleAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_test.step(action)

    # Generate report for the testing session
    env_test.generate_report()

    # Calculate and display metrics for the testing period
    testing_metrics = calculate_metrics(env_test.trades, env_test.initial_balance, env_test.balance)
    print("\n--- Testing Metrics ---")
    for metric, value in testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.93e+03 |
|    ep_rew_mean      | 0.392    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1288     |
|    time_elapsed     | 24       |
|    total_timesteps  | 31732    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00038  |
|    n_updates        | 7907     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.93e+03 |
|    ep_rew_mean      | -0.413   |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1275     |
|    time_elapsed     | 49       |
|    total_timesteps  | 63464    |
| train/              |        


--- Agent Report ---
Agent starts with 0 holdings (neutral position), Initial Balance: 10000
Agent sells (short) at 29.938, Current Balance: 10000, Holdings: 1 Short
Agent closes short at 29.945, profit: -0.0070000000000014495, Current Balance: 9999.993, Holdings: 0
Agent sells (short) at 29.882999999999996, Current Balance: 9999.993, Holdings: 1 Short
Agent closes short at 28.519, profit: 1.3639999999999972, Current Balance: 10001.357, Holdings: 0
Agent sells (short) at 28.525, Current Balance: 10001.357, Holdings: 1 Short
Agent closes short at 28.5055, profit: 0.019499999999997186, Current Balance: 10001.3765, Holdings: 0
Agent buys at 28.196, Current Balance: 10001.3765, Holdings: 1 Long
Agent closes long at 28.37, profit: 0.1739999999999995, Current Balance: 10001.550500000001, Holdings: 0
Agent sells (short) at 27.945, Current Balance: 10001.550500000001, Holdings: 1 Short
Agent closes short at 27.964, profit: -0.01899999999999835, Current Balance: 10001.531500000001, Holdings: 0

In [3]:
#a2c algorithm single agent
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import A2C
from sklearn.preprocessing import StandardScaler

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.log = []  # Log for detailed reporting
        self.scaler = scaler  # Store the scaler for inverse scaling

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        self.log = []  # Reset log

        # Log initial holdings
        self.log.append(f"Agent starts with 0 holdings (neutral position), Initial Balance: {self.balance}")
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        return obs

    def inverse_scale_price(self, price):
        # Inverse transform the scaled price to get the original value (considering the 'close' column)
        inverse_scaled = self.scaler.inverse_transform([[0, 0, 0, price, 0]])[0][3]
        return inverse_scaled

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        original_price = self.inverse_scale_price(current_price)  # Get original (inverse-scaled) price

        # If agent buys
        if action == 1:
            if self.position == 0:  # Only buy if neutral
                self.position = 1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent buys at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Long")
            elif self.position == -1:  # Close short position
                reward = self.entry_price - current_price  # Scaled reward
                original_reward = self.inverse_scale_price(self.entry_price) - original_price
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes short at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        # If agent sells
        elif action == 2:
            if self.position == 0:  # Only sell if neutral
                self.position = -1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent sells (short) at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Short")
            elif self.position == 1:  # Close long position
                reward = current_price - self.entry_price  # Scaled reward
                original_reward = original_price - self.inverse_scale_price(self.entry_price)
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes long at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

    def generate_report(self):
        print("\n--- Agent Report ---")
        for log in self.log:
            print(log)
        print(f"Final Balance: {self.balance}")
        print(f"Total Profit: {self.balance - self.initial_balance}")
        print(f"Number of Holdings (Long): {1 if self.position == 1 else 0}, Short: {1 if self.position == -1 else 0}")
        print("-" * 40)

# Function to calculate additional metrics
def calculate_metrics(trades, initial_balance, final_balance):
    # Total Profit
    total_profit = final_balance - initial_balance

    # Cumulative Return
    cumulative_return = (final_balance - initial_balance) / initial_balance

    # Win Rate
    positive_trades = [trade for trade in trades if trade > 0]
    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Profit Factor
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf

    # Sharpe Ratio
    returns = np.array(trades)
    mean_return = np.mean(returns)
    std_return = np.std(returns)
    sharpe_ratio = mean_return / std_return if std_return != 0 else 0

    # Sortino Ratio (using only downside standard deviation)
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = mean_return / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    balance_series = np.cumsum([initial_balance] + trades)  # Series of balance over time
    peak_balance = np.maximum.accumulate(balance_series)
    drawdowns = (peak_balance - balance_series) / peak_balance
    max_drawdown = np.max(drawdowns) if drawdowns.size > 0 else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Train and evaluate the model with action logging for debugging
def train_and_evaluate():
    # Load and normalize the data
    train_file = 'NVDA_TRAINING.csv'
    test_file = 'NVDA_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Create the environment using the training data
    env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)

    # Initialize the A2C model and train with more timesteps
    model = A2C("MlpPolicy", env_train, verbose=1)
    model.learn(total_timesteps=500000)  # Increased timesteps

    # Testing on the training data with action logging
    obs, _ = env_train.reset()
    done = False
    print("\n--- Training Session ---")
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_train.step(action)
        # Log each action and reward for debugging
        print(f"Step: {env_train.current_step}, Action: {action}, Reward: {reward}")

    # Generate report and metrics for the training session
    env_train.generate_report()
    training_metrics = calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance)
    print("\n--- Training Metrics ---")
    for metric, value in training_metrics.items():
        print(f"{metric}: {value}")

    # Testing on the testing data with action logging
    env_test = SingleAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    print("\n--- Testing Session ---")
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env_test.step(action)
        # Log each action and reward for debugging
        print(f"Step: {env_test.current_step}, Action: {action}, Reward: {reward}")

    # Generate report and metrics for the testing session
    env_test.generate_report()
    testing_metrics = calculate_metrics(env_test.trades, env_test.initial_balance, env_test.balance)
    print("\n--- Testing Metrics ---")
    for metric, value in testing_metrics.items():
        print(f"{metric}: {value}")



# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-------------------------------------
| time/                 |           |
|    fps                | 886       |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.978    |
|    explained_variance | -7.35e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -0.115    |
|    value_loss         | 0.0243    |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 935      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.491   |
|    explained_variance | -0.525   |
|    learning_rate      | 0.0007   |
|   

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.419    |
| time/                 |          |
|    fps                | 974      |
|    iterations         | 1700     |
|    time_elapsed       | 8        |
|    total_timesteps    | 8500     |
| train/                |          |
|    entropy_loss       | -0.602   |
|    explained_variance | -21.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1699     |
|    policy_loss        | -0.0111  |
|    value_loss         | 0.000998 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.419    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 1800     |
|    time_elapsed       | 9        |
|    total_timesteps    | 9000     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.419    |
| time/                 |          |
|    fps                | 981      |
|    iterations         | 3000     |
|    time_elapsed       | 15       |
|    total_timesteps    | 15000    |
| train/                |          |
|    entropy_loss       | -0.45    |
|    explained_variance | -2.84    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2999     |
|    policy_loss        | -0.0011  |
|    value_loss         | 2.37e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.419     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 3100      |
|    time_elapsed       | 15        |
|    total_timesteps    | 15500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.866    |
| time/                 |          |
|    fps                | 986      |
|    iterations         | 4300     |
|    time_elapsed       | 21       |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -0.373   |
|    explained_variance | -383     |
|    learning_rate      | 0.0007   |
|    n_updates          | 4299     |
|    policy_loss        | 0.000858 |
|    value_loss         | 8.51e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.866     |
| time/                 |           |
|    fps                | 987       |
|    iterations         | 4400      |
|    time_elapsed       | 22        |
|    total_timesteps    | 22000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 1.05     |
| time/                 |          |
|    fps                | 987      |
|    iterations         | 5600     |
|    time_elapsed       | 28       |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -0.478   |
|    explained_variance | 0.538    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | 0.00942  |
|    value_loss         | 5.86e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 1.05      |
| time/                 |           |
|    fps                | 987       |
|    iterations         | 5700      |
|    time_elapsed       | 28        |
|    total_timesteps    | 28500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.596    |
| time/                 |          |
|    fps                | 988      |
|    iterations         | 6900     |
|    time_elapsed       | 34       |
|    total_timesteps    | 34500    |
| train/                |          |
|    entropy_loss       | -0.448   |
|    explained_variance | -6.57    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6899     |
|    policy_loss        | -0.0105  |
|    value_loss         | 0.000152 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.596     |
| time/                 |           |
|    fps                | 989       |
|    iterations         | 7000      |
|    time_elapsed       | 35        |
|    total_timesteps    | 35000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.551    |
| time/                 |          |
|    fps                | 990      |
|    iterations         | 8200     |
|    time_elapsed       | 41       |
|    total_timesteps    | 41000    |
| train/                |          |
|    entropy_loss       | -0.155   |
|    explained_variance | -0.15    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8199     |
|    policy_loss        | 1.68e-05 |
|    value_loss         | 2.07e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.551    |
| time/                 |          |
|    fps                | 990      |
|    iterations         | 8300     |
|    time_elapsed       | 41       |
|    total_timesteps    | 41500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.551    |
| time/                 |          |
|    fps                | 938      |
|    iterations         | 9500     |
|    time_elapsed       | 50       |
|    total_timesteps    | 47500    |
| train/                |          |
|    entropy_loss       | -0.0586  |
|    explained_variance | -0.919   |
|    learning_rate      | 0.0007   |
|    n_updates          | 9499     |
|    policy_loss        | 1.05e-06 |
|    value_loss         | 2.58e-08 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0359    |
| time/                 |           |
|    fps                | 938       |
|    iterations         | 9600      |
|    time_elapsed       | 51        |
|    total_timesteps    | 48000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0359   |
| time/                 |          |
|    fps                | 933      |
|    iterations         | 10800    |
|    time_elapsed       | 57       |
|    total_timesteps    | 54000    |
| train/                |          |
|    entropy_loss       | -0.0221  |
|    explained_variance | -0.0903  |
|    learning_rate      | 0.0007   |
|    n_updates          | 10799    |
|    policy_loss        | 2.21e-06 |
|    value_loss         | 5.59e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0359   |
| time/                 |          |
|    fps                | 933      |
|    iterations         | 10900    |
|    time_elapsed       | 58       |
|    total_timesteps    | 54500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.0281  |
| time/                 |          |
|    fps                | 938      |
|    iterations         | 12100    |
|    time_elapsed       | 64       |
|    total_timesteps    | 60500    |
| train/                |          |
|    entropy_loss       | -0.482   |
|    explained_variance | -1.37    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12099    |
|    policy_loss        | 0.00176  |
|    value_loss         | 4.95e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0281   |
| time/                 |           |
|    fps                | 939       |
|    iterations         | 12200     |
|    time_elapsed       | 64        |
|    total_timesteps    | 61000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0399   |
| time/                 |          |
|    fps                | 943      |
|    iterations         | 13400    |
|    time_elapsed       | 71       |
|    total_timesteps    | 67000    |
| train/                |          |
|    entropy_loss       | -0.387   |
|    explained_variance | -0.082   |
|    learning_rate      | 0.0007   |
|    n_updates          | 13399    |
|    policy_loss        | 0.051    |
|    value_loss         | 0.00191  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0399   |
| time/                 |          |
|    fps                | 943      |
|    iterations         | 13500    |
|    time_elapsed       | 71       |
|    total_timesteps    | 67500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.233   |
| time/                 |          |
|    fps                | 946      |
|    iterations         | 14700    |
|    time_elapsed       | 77       |
|    total_timesteps    | 73500    |
| train/                |          |
|    entropy_loss       | -0.71    |
|    explained_variance | -0.00176 |
|    learning_rate      | 0.0007   |
|    n_updates          | 14699    |
|    policy_loss        | 0.0095   |
|    value_loss         | 0.00198  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.233   |
| time/                 |          |
|    fps                | 946      |
|    iterations         | 14800    |
|    time_elapsed       | 78       |
|    total_timesteps    | 74000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.199   |
| time/                 |          |
|    fps                | 949      |
|    iterations         | 16000    |
|    time_elapsed       | 84       |
|    total_timesteps    | 80000    |
| train/                |          |
|    entropy_loss       | -0.214   |
|    explained_variance | -0.0287  |
|    learning_rate      | 0.0007   |
|    n_updates          | 15999    |
|    policy_loss        | -0.00992 |
|    value_loss         | 0.000142 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.199    |
| time/                 |           |
|    fps                | 949       |
|    iterations         | 16100     |
|    time_elapsed       | 84        |
|    total_timesteps    | 80500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.199   |
| time/                 |          |
|    fps                | 951      |
|    iterations         | 17300    |
|    time_elapsed       | 90       |
|    total_timesteps    | 86500    |
| train/                |          |
|    entropy_loss       | -0.492   |
|    explained_variance | -630     |
|    learning_rate      | 0.0007   |
|    n_updates          | 17299    |
|    policy_loss        | 4.55e-05 |
|    value_loss         | 4.93e-09 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.199    |
| time/                 |           |
|    fps                | 952       |
|    iterations         | 17400     |
|    time_elapsed       | 91        |
|    total_timesteps    | 87000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.0819  |
| time/                 |          |
|    fps                | 954      |
|    iterations         | 18600    |
|    time_elapsed       | 97       |
|    total_timesteps    | 93000    |
| train/                |          |
|    entropy_loss       | -0.477   |
|    explained_variance | -0.766   |
|    learning_rate      | 0.0007   |
|    n_updates          | 18599    |
|    policy_loss        | 6.54e-05 |
|    value_loss         | 6.56e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.0819  |
| time/                 |          |
|    fps                | 954      |
|    iterations         | 18700    |
|    time_elapsed       | 97       |
|    total_timesteps    | 93500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.00665  |
| time/                 |           |
|    fps                | 955       |
|    iterations         | 19900     |
|    time_elapsed       | 104       |
|    total_timesteps    | 99500     |
| train/                |           |
|    entropy_loss       | -0.00542  |
|    explained_variance | 0.238     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19899     |
|    policy_loss        | -4.02e-06 |
|    value_loss         | 1.85e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.00665 |
| time/                 |          |
|    fps                | 955      |
|    iterations         | 20000    |
|    time_elapsed       | 104      |
|    total_timesteps    | 100000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0192    |
| time/                 |           |
|    fps                | 957       |
|    iterations         | 21200     |
|    time_elapsed       | 110       |
|    total_timesteps    | 106000    |
| train/                |           |
|    entropy_loss       | -0.00205  |
|    explained_variance | -2.96e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 21199     |
|    policy_loss        | -2.26e-07 |
|    value_loss         | 3.37e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0192    |
| time/                 |           |
|    fps                | 957       |
|    iterations         | 21300     |
|    time_elapsed       | 111       |
|    total_timesteps    | 106500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0317    |
| time/                 |           |
|    fps                | 959       |
|    iterations         | 22500     |
|    time_elapsed       | 117       |
|    total_timesteps    | 112500    |
| train/                |           |
|    entropy_loss       | -0.000836 |
|    explained_variance | -0.103    |
|    learning_rate      | 0.0007    |
|    n_updates          | 22499     |
|    policy_loss        | 6.91e-08  |
|    value_loss         | 1.01e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0317    |
| time/                 |           |
|    fps                | 959       |
|    iterations         | 22600     |
|    time_elapsed       | 117       |
|    total_timesteps    | 113000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0829    |
| time/                 |           |
|    fps                | 960       |
|    iterations         | 23800     |
|    time_elapsed       | 123       |
|    total_timesteps    | 119000    |
| train/                |           |
|    entropy_loss       | -0.0188   |
|    explained_variance | -3.27e+13 |
|    learning_rate      | 0.0007    |
|    n_updates          | 23799     |
|    policy_loss        | -3.25e-07 |
|    value_loss         | 1.45e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0829   |
| time/                 |          |
|    fps                | 960      |
|    iterations         | 23900    |
|    time_elapsed       | 124      |
|    total_timesteps    | 119500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0829    |
| time/                 |           |
|    fps                | 962       |
|    iterations         | 25100     |
|    time_elapsed       | 130       |
|    total_timesteps    | 125500    |
| train/                |           |
|    entropy_loss       | -0.0118   |
|    explained_variance | -8.19     |
|    learning_rate      | 0.0007    |
|    n_updates          | 25099     |
|    policy_loss        | -2.37e-07 |
|    value_loss         | 3.34e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0829    |
| time/                 |           |
|    fps                | 962       |
|    iterations         | 25200     |
|    time_elapsed       | 130       |
|    total_timesteps    | 126000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.127    |
| time/                 |          |
|    fps                | 963      |
|    iterations         | 26400    |
|    time_elapsed       | 136      |
|    total_timesteps    | 132000   |
| train/                |          |
|    entropy_loss       | -0.0467  |
|    explained_variance | -5.92    |
|    learning_rate      | 0.0007   |
|    n_updates          | 26399    |
|    policy_loss        | 1.6e-06  |
|    value_loss         | 8.89e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.127    |
| time/                 |          |
|    fps                | 963      |
|    iterations         | 26500    |
|    time_elapsed       | 137      |
|    total_timesteps    | 132500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.197     |
| time/                 |           |
|    fps                | 963       |
|    iterations         | 27700     |
|    time_elapsed       | 143       |
|    total_timesteps    | 138500    |
| train/                |           |
|    entropy_loss       | -0.000995 |
|    explained_variance | -8.32     |
|    learning_rate      | 0.0007    |
|    n_updates          | 27699     |
|    policy_loss        | 2.19e-07  |
|    value_loss         | 5.64e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.197     |
| time/                 |           |
|    fps                | 963       |
|    iterations         | 27800     |
|    time_elapsed       | 144       |
|    total_timesteps    | 139000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.239    |
| time/                 |          |
|    fps                | 965      |
|    iterations         | 29000    |
|    time_elapsed       | 150      |
|    total_timesteps    | 145000   |
| train/                |          |
|    entropy_loss       | -0.0011  |
|    explained_variance | 0.268    |
|    learning_rate      | 0.0007   |
|    n_updates          | 28999    |
|    policy_loss        | 2.29e-08 |
|    value_loss         | 4.95e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.239    |
| time/                 |          |
|    fps                | 965      |
|    iterations         | 29100    |
|    time_elapsed       | 150      |
|    total_timesteps    | 145500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.278    |
| time/                 |          |
|    fps                | 966      |
|    iterations         | 30300    |
|    time_elapsed       | 156      |
|    total_timesteps    | 151500   |
| train/                |          |
|    entropy_loss       | -0.00113 |
|    explained_variance | -0.0244  |
|    learning_rate      | 0.0007   |
|    n_updates          | 30299    |
|    policy_loss        | 6.03e-08 |
|    value_loss         | 3.59e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.278     |
| time/                 |           |
|    fps                | 966       |
|    iterations         | 30400     |
|    time_elapsed       | 157       |
|    total_timesteps    | 152000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.278    |
| time/                 |          |
|    fps                | 967      |
|    iterations         | 31600    |
|    time_elapsed       | 163      |
|    total_timesteps    | 158000   |
| train/                |          |
|    entropy_loss       | -0.00531 |
|    explained_variance | -7.79    |
|    learning_rate      | 0.0007   |
|    n_updates          | 31599    |
|    policy_loss        | 4.38e-09 |
|    value_loss         | 7.13e-11 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.278    |
| time/                 |          |
|    fps                | 967      |
|    iterations         | 31700    |
|    time_elapsed       | 163      |
|    total_timesteps    | 158500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.32      |
| time/                 |           |
|    fps                | 968       |
|    iterations         | 32900     |
|    time_elapsed       | 169       |
|    total_timesteps    | 164500    |
| train/                |           |
|    entropy_loss       | -0.00814  |
|    explained_variance | -5.59e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 32899     |
|    policy_loss        | -6.68e-07 |
|    value_loss         | 4.85e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.32     |
| time/                 |          |
|    fps                | 968      |
|    iterations         | 33000    |
|    time_elapsed       | 170      |
|    total_timesteps    | 165000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.361    |
| time/                 |          |
|    fps                | 969      |
|    iterations         | 34200    |
|    time_elapsed       | 176      |
|    total_timesteps    | 171000   |
| train/                |          |
|    entropy_loss       | -0.00143 |
|    explained_variance | -50.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 34199    |
|    policy_loss        | 2.26e-06 |
|    value_loss         | 0.000234 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.361    |
| time/                 |          |
|    fps                | 969      |
|    iterations         | 34300    |
|    time_elapsed       | 176      |
|    total_timesteps    | 171500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.359    |
| time/                 |          |
|    fps                | 970      |
|    iterations         | 35500    |
|    time_elapsed       | 182      |
|    total_timesteps    | 177500   |
| train/                |          |
|    entropy_loss       | -0.115   |
|    explained_variance | -35.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 35499    |
|    policy_loss        | 0.000602 |
|    value_loss         | 3.28e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.359     |
| time/                 |           |
|    fps                | 970       |
|    iterations         | 35600     |
|    time_elapsed       | 183       |
|    total_timesteps    | 178000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.364    |
| time/                 |          |
|    fps                | 971      |
|    iterations         | 36800    |
|    time_elapsed       | 189      |
|    total_timesteps    | 184000   |
| train/                |          |
|    entropy_loss       | -0.0038  |
|    explained_variance | -23.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 36799    |
|    policy_loss        | 1.19e-07 |
|    value_loss         | 1.38e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.364     |
| time/                 |           |
|    fps                | 971       |
|    iterations         | 36900     |
|    time_elapsed       | 189       |
|    total_timesteps    | 184500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.35     |
| time/                 |          |
|    fps                | 972      |
|    iterations         | 38100    |
|    time_elapsed       | 195      |
|    total_timesteps    | 190500   |
| train/                |          |
|    entropy_loss       | -0.00274 |
|    explained_variance | -38      |
|    learning_rate      | 0.0007   |
|    n_updates          | 38099    |
|    policy_loss        | 2.12e-08 |
|    value_loss         | 5.42e-09 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.35      |
| time/                 |           |
|    fps                | 972       |
|    iterations         | 38200     |
|    time_elapsed       | 196       |
|    total_timesteps    | 191000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.35     |
| time/                 |          |
|    fps                | 973      |
|    iterations         | 39400    |
|    time_elapsed       | 202      |
|    total_timesteps    | 197000   |
| train/                |          |
|    entropy_loss       | -0.0121  |
|    explained_variance | -307     |
|    learning_rate      | 0.0007   |
|    n_updates          | 39399    |
|    policy_loss        | 9.04e-08 |
|    value_loss         | 8.03e-09 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.35      |
| time/                 |           |
|    fps                | 973       |
|    iterations         | 39500     |
|    time_elapsed       | 202       |
|    total_timesteps    | 197500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.363    |
| time/                 |          |
|    fps                | 973      |
|    iterations         | 40700    |
|    time_elapsed       | 208      |
|    total_timesteps    | 203500   |
| train/                |          |
|    entropy_loss       | -0.208   |
|    explained_variance | -39.6    |
|    learning_rate      | 0.0007   |
|    n_updates          | 40699    |
|    policy_loss        | 0.000142 |
|    value_loss         | 1.09e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.363    |
| time/                 |          |
|    fps                | 973      |
|    iterations         | 40800    |
|    time_elapsed       | 209      |
|    total_timesteps    | 204000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.372     |
| time/                 |           |
|    fps                | 974       |
|    iterations         | 42000     |
|    time_elapsed       | 215       |
|    total_timesteps    | 210000    |
| train/                |           |
|    entropy_loss       | -0.00386  |
|    explained_variance | -1.22     |
|    learning_rate      | 0.0007    |
|    n_updates          | 41999     |
|    policy_loss        | -9.39e-07 |
|    value_loss         | 5.05e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.372     |
| time/                 |           |
|    fps                | 974       |
|    iterations         | 42100     |
|    time_elapsed       | 216       |
|    total_timesteps    | 210500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.384    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 43300    |
|    time_elapsed       | 222      |
|    total_timesteps    | 216500   |
| train/                |          |
|    entropy_loss       | -0.0852  |
|    explained_variance | -0.0685  |
|    learning_rate      | 0.0007   |
|    n_updates          | 43299    |
|    policy_loss        | 0.000292 |
|    value_loss         | 9.88e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.384    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 43400    |
|    time_elapsed       | 222      |
|    total_timesteps    | 217000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.344    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 44600    |
|    time_elapsed       | 228      |
|    total_timesteps    | 223000   |
| train/                |          |
|    entropy_loss       | -0.0599  |
|    explained_variance | 0.0458   |
|    learning_rate      | 0.0007   |
|    n_updates          | 44599    |
|    policy_loss        | 3.18e-05 |
|    value_loss         | 3.26e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.344    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 44700    |
|    time_elapsed       | 229      |
|    total_timesteps    | 223500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.344     |
| time/                 |           |
|    fps                | 975       |
|    iterations         | 45900     |
|    time_elapsed       | 235       |
|    total_timesteps    | 229500    |
| train/                |           |
|    entropy_loss       | -0.0554   |
|    explained_variance | -17.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 45899     |
|    policy_loss        | -1.15e-08 |
|    value_loss         | 1.31e-11  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.344    |
| time/                 |          |
|    fps                | 975      |
|    iterations         | 46000    |
|    time_elapsed       | 235      |
|    total_timesteps    | 230000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.293     |
| time/                 |           |
|    fps                | 976       |
|    iterations         | 47200     |
|    time_elapsed       | 241       |
|    total_timesteps    | 236000    |
| train/                |           |
|    entropy_loss       | -0.0131   |
|    explained_variance | -253      |
|    learning_rate      | 0.0007    |
|    n_updates          | 47199     |
|    policy_loss        | -2.07e-08 |
|    value_loss         | 1e-08     |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.293    |
| time/                 |          |
|    fps                | 976      |
|    iterations         | 47300    |
|    time_elapsed       | 242      |
|    total_timesteps    | 236500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.237    |
| time/                 |          |
|    fps                | 976      |
|    iterations         | 48500    |
|    time_elapsed       | 248      |
|    total_timesteps    | 242500   |
| train/                |          |
|    entropy_loss       | -0.0141  |
|    explained_variance | -0.609   |
|    learning_rate      | 0.0007   |
|    n_updates          | 48499    |
|    policy_loss        | 7.97e-06 |
|    value_loss         | 1.83e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.237     |
| time/                 |           |
|    fps                | 976       |
|    iterations         | 48600     |
|    time_elapsed       | 248       |
|    total_timesteps    | 243000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.187     |
| time/                 |           |
|    fps                | 976       |
|    iterations         | 49800     |
|    time_elapsed       | 254       |
|    total_timesteps    | 249000    |
| train/                |           |
|    entropy_loss       | -1.98e-05 |
|    explained_variance | -17.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 49799     |
|    policy_loss        | -2.19e-10 |
|    value_loss         | 4.85e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.187     |
| time/                 |           |
|    fps                | 976       |
|    iterations         | 49900     |
|    time_elapsed       | 255       |
|    total_timesteps    | 249500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.139     |
| time/                 |           |
|    fps                | 977       |
|    iterations         | 51100     |
|    time_elapsed       | 261       |
|    total_timesteps    | 255500    |
| train/                |           |
|    entropy_loss       | -1.2e-05  |
|    explained_variance | 0.01      |
|    learning_rate      | 0.0007    |
|    n_updates          | 51099     |
|    policy_loss        | -4.37e-10 |
|    value_loss         | 2.52e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.139     |
| time/                 |           |
|    fps                | 977       |
|    iterations         | 51200     |
|    time_elapsed       | 261       |
|    total_timesteps    | 256000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0734    |
| time/                 |           |
|    fps                | 977       |
|    iterations         | 52400     |
|    time_elapsed       | 267       |
|    total_timesteps    | 262000    |
| train/                |           |
|    entropy_loss       | -1.53e-05 |
|    explained_variance | -20.1     |
|    learning_rate      | 0.0007    |
|    n_updates          | 52399     |
|    policy_loss        | 4.91e-11  |
|    value_loss         | 4.24e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0734    |
| time/                 |           |
|    fps                | 977       |
|    iterations         | 52500     |
|    time_elapsed       | 268       |
|    total_timesteps    | 262500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0734   |
| time/                 |          |
|    fps                | 978      |
|    iterations         | 53700    |
|    time_elapsed       | 274      |
|    total_timesteps    | 268500   |
| train/                |          |
|    entropy_loss       | -0.00117 |
|    explained_variance | -29.1    |
|    learning_rate      | 0.0007   |
|    n_updates          | 53699    |
|    policy_loss        | 1.57e-09 |
|    value_loss         | 1.48e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0734   |
| time/                 |          |
|    fps                | 978      |
|    iterations         | 53800    |
|    time_elapsed       | 275      |
|    total_timesteps    | 269000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.0329   |
| time/                 |          |
|    fps                | 978      |
|    iterations         | 55000    |
|    time_elapsed       | 281      |
|    total_timesteps    | 275000   |
| train/                |          |
|    entropy_loss       | -2.2e-05 |
|    explained_variance | -312     |
|    learning_rate      | 0.0007   |
|    n_updates          | 54999    |
|    policy_loss        | 5.34e-09 |
|    value_loss         | 4.77e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.0329    |
| time/                 |           |
|    fps                | 978       |
|    iterations         | 55100     |
|    time_elapsed       | 281       |
|    total_timesteps    | 275500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.00769  |
| time/                 |           |
|    fps                | 978       |
|    iterations         | 56300     |
|    time_elapsed       | 287       |
|    total_timesteps    | 281500    |
| train/                |           |
|    entropy_loss       | -1.47e-05 |
|    explained_variance | -120      |
|    learning_rate      | 0.0007    |
|    n_updates          | 56299     |
|    policy_loss        | 1e-09     |
|    value_loss         | 2.32e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.00769  |
| time/                 |           |
|    fps                | 978       |
|    iterations         | 56400     |
|    time_elapsed       | 288       |
|    total_timesteps    | 282000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.00747  |
| time/                 |           |
|    fps                | 978       |
|    iterations         | 57600     |
|    time_elapsed       | 294       |
|    total_timesteps    | 288000    |
| train/                |           |
|    entropy_loss       | -1.52e-05 |
|    explained_variance | -0.4      |
|    learning_rate      | 0.0007    |
|    n_updates          | 57599     |
|    policy_loss        | 6.42e-10  |
|    value_loss         | 6.46e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.00747  |
| time/                 |           |
|    fps                | 978       |
|    iterations         | 57700     |
|    time_elapsed       | 294       |
|    total_timesteps    | 288500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0452   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 58900     |
|    time_elapsed       | 300       |
|    total_timesteps    | 294500    |
| train/                |           |
|    entropy_loss       | -3.93e-05 |
|    explained_variance | -0.242    |
|    learning_rate      | 0.0007    |
|    n_updates          | 58899     |
|    policy_loss        | 1.32e-09  |
|    value_loss         | 1.78e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0452   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 59000     |
|    time_elapsed       | 301       |
|    total_timesteps    | 295000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0452   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 60200     |
|    time_elapsed       | 307       |
|    total_timesteps    | 301000    |
| train/                |           |
|    entropy_loss       | -0.000283 |
|    explained_variance | -14.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 60199     |
|    policy_loss        | 1.2e-10   |
|    value_loss         | 6.02e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0947   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 60300     |
|    time_elapsed       | 307       |
|    total_timesteps    | 301500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0947   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 61500     |
|    time_elapsed       | 313       |
|    total_timesteps    | 307500    |
| train/                |           |
|    entropy_loss       | -0.000298 |
|    explained_variance | -48.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 61499     |
|    policy_loss        | 1.71e-09  |
|    value_loss         | 2.59e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0947   |
| time/                 |           |
|    fps                | 979       |
|    iterations         | 61600     |
|    time_elapsed       | 314       |
|    total_timesteps    | 308000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0923   |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 62800     |
|    time_elapsed       | 320       |
|    total_timesteps    | 314000    |
| train/                |           |
|    entropy_loss       | -1.78e-05 |
|    explained_variance | -326      |
|    learning_rate      | 0.0007    |
|    n_updates          | 62799     |
|    policy_loss        | 4.57e-10  |
|    value_loss         | 7.82e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0923   |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 62900     |
|    time_elapsed       | 320       |
|    total_timesteps    | 314500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.125    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 64100     |
|    time_elapsed       | 326       |
|    total_timesteps    | 320500    |
| train/                |           |
|    entropy_loss       | -1.34e-05 |
|    explained_variance | -51       |
|    learning_rate      | 0.0007    |
|    n_updates          | 64099     |
|    policy_loss        | 1.42e-10  |
|    value_loss         | 3.33e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.125   |
| time/                 |          |
|    fps                | 979      |
|    iterations         | 64200    |
|    time_elapsed       | 327      |
|    total_timesteps    | 321000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.155    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 65400     |
|    time_elapsed       | 333       |
|    total_timesteps    | 327000    |
| train/                |           |
|    entropy_loss       | -1.84e-05 |
|    explained_variance | -1.08     |
|    learning_rate      | 0.0007    |
|    n_updates          | 65399     |
|    policy_loss        | -7.46e-10 |
|    value_loss         | 9.33e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.155    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 65500     |
|    time_elapsed       | 334       |
|    total_timesteps    | 327500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.151    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 66700     |
|    time_elapsed       | 340       |
|    total_timesteps    | 333500    |
| train/                |           |
|    entropy_loss       | -0.000128 |
|    explained_variance | -5.76     |
|    learning_rate      | 0.0007    |
|    n_updates          | 66699     |
|    policy_loss        | 2.05e-10  |
|    value_loss         | 1.2e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.151    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 66800     |
|    time_elapsed       | 340       |
|    total_timesteps    | 334000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.151    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 68000     |
|    time_elapsed       | 346       |
|    total_timesteps    | 340000    |
| train/                |           |
|    entropy_loss       | -0.000401 |
|    explained_variance | -33.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 67999     |
|    policy_loss        | 1.57e-09  |
|    value_loss         | 2.04e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.151    |
| time/                 |           |
|    fps                | 980       |
|    iterations         | 68100     |
|    time_elapsed       | 347       |
|    total_timesteps    | 340500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.148   |
| time/                 |          |
|    fps                | 981      |
|    iterations         | 69300    |
|    time_elapsed       | 353      |
|    total_timesteps    | 346500   |
| train/                |          |
|    entropy_loss       | -1.8e-05 |
|    explained_variance | -44      |
|    learning_rate      | 0.0007   |
|    n_updates          | 69299    |
|    policy_loss        | -1.4e-09 |
|    value_loss         | 2.68e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.148   |
| time/                 |          |
|    fps                | 981      |
|    iterations         | 69400    |
|    time_elapsed       | 353      |
|    total_timesteps    | 347000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.144   |
| time/                 |          |
|    fps                | 981      |
|    iterations         | 70600    |
|    time_elapsed       | 359      |
|    total_timesteps    | 353000   |
| train/                |          |
|    entropy_loss       | -1.3e-05 |
|    explained_variance | -22.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 70599    |
|    policy_loss        | 1.79e-10 |
|    value_loss         | 5e-08    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.144    |
| time/                 |           |
|    fps                | 981       |
|    iterations         | 70700     |
|    time_elapsed       | 360       |
|    total_timesteps    | 353500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.187    |
| time/                 |           |
|    fps                | 981       |
|    iterations         | 71900     |
|    time_elapsed       | 366       |
|    total_timesteps    | 359500    |
| train/                |           |
|    entropy_loss       | -4.47e-05 |
|    explained_variance | 0.0848    |
|    learning_rate      | 0.0007    |
|    n_updates          | 71899     |
|    policy_loss        | 8.15e-10  |
|    value_loss         | 1.72e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.187    |
| time/                 |           |
|    fps                | 981       |
|    iterations         | 72000     |
|    time_elapsed       | 366       |
|    total_timesteps    | 360000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.183    |
| time/                 |           |
|    fps                | 981       |
|    iterations         | 73200     |
|    time_elapsed       | 372       |
|    total_timesteps    | 366000    |
| train/                |           |
|    entropy_loss       | -8.53e-05 |
|    explained_variance | -0.107    |
|    learning_rate      | 0.0007    |
|    n_updates          | 73199     |
|    policy_loss        | 2.01e-09  |
|    value_loss         | 1.61e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.183    |
| time/                 |           |
|    fps                | 981       |
|    iterations         | 73300     |
|    time_elapsed       | 373       |
|    total_timesteps    | 366500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.183    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 74500     |
|    time_elapsed       | 379       |
|    total_timesteps    | 372500    |
| train/                |           |
|    entropy_loss       | -9.21e-05 |
|    explained_variance | -27.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 74499     |
|    policy_loss        | -4.73e-12 |
|    value_loss         | 8.6e-13   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.179   |
| time/                 |          |
|    fps                | 982      |
|    iterations         | 74600    |
|    time_elapsed       | 379      |
|    total_timesteps    | 373000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.179    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 75800     |
|    time_elapsed       | 385       |
|    total_timesteps    | 379000    |
| train/                |           |
|    entropy_loss       | -0.000152 |
|    explained_variance | -96.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 75799     |
|    policy_loss        | -7.35e-10 |
|    value_loss         | 9.26e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.179    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 75900     |
|    time_elapsed       | 386       |
|    total_timesteps    | 379500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.202    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 77100     |
|    time_elapsed       | 392       |
|    total_timesteps    | 385500    |
| train/                |           |
|    entropy_loss       | -2.68e-05 |
|    explained_variance | -48       |
|    learning_rate      | 0.0007    |
|    n_updates          | 77099     |
|    policy_loss        | -2.61e-10 |
|    value_loss         | 1.07e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.202   |
| time/                 |          |
|    fps                | 982      |
|    iterations         | 77200    |
|    time_elapsed       | 392      |
|    total_timesteps    | 386000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.225    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 78400     |
|    time_elapsed       | 398       |
|    total_timesteps    | 392000    |
| train/                |           |
|    entropy_loss       | -2.14e-05 |
|    explained_variance | -44.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 78399     |
|    policy_loss        | -8.93e-11 |
|    value_loss         | 8.18e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.225    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 78500     |
|    time_elapsed       | 399       |
|    total_timesteps    | 392500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.22     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 79700     |
|    time_elapsed       | 405       |
|    total_timesteps    | 398500    |
| train/                |           |
|    entropy_loss       | -0.000107 |
|    explained_variance | -0.0468   |
|    learning_rate      | 0.0007    |
|    n_updates          | 79699     |
|    policy_loss        | 4.59e-09  |
|    value_loss         | 3.53e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.22    |
| time/                 |          |
|    fps                | 982      |
|    iterations         | 79800    |
|    time_elapsed       | 406      |
|    total_timesteps    | 399000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.243   |
| time/                 |          |
|    fps                | 982      |
|    iterations         | 81000    |
|    time_elapsed       | 412      |
|    total_timesteps    | 405000   |
| train/                |          |
|    entropy_loss       | -0.00108 |
|    explained_variance | -1.94    |
|    learning_rate      | 0.0007   |
|    n_updates          | 80999    |
|    policy_loss        | 1.39e-09 |
|    value_loss         | 4.86e-10 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.243    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 81100     |
|    time_elapsed       | 412       |
|    total_timesteps    | 405500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.243    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 82300     |
|    time_elapsed       | 418       |
|    total_timesteps    | 411500    |
| train/                |           |
|    entropy_loss       | -0.00034  |
|    explained_variance | -266      |
|    learning_rate      | 0.0007    |
|    n_updates          | 82299     |
|    policy_loss        | -4.86e-10 |
|    value_loss         | 5.18e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.243    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 82400     |
|    time_elapsed       | 419       |
|    total_timesteps    | 412000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.238    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 83600     |
|    time_elapsed       | 425       |
|    total_timesteps    | 418000    |
| train/                |           |
|    entropy_loss       | -0.0815   |
|    explained_variance | -44.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 83599     |
|    policy_loss        | -0.000154 |
|    value_loss         | 2.09e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.238    |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 83700     |
|    time_elapsed       | 425       |
|    total_timesteps    | 418500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.234    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 84900     |
|    time_elapsed       | 431       |
|    total_timesteps    | 424500    |
| train/                |           |
|    entropy_loss       | -2.46e-05 |
|    explained_variance | -11.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 84899     |
|    policy_loss        | 1.2e-09   |
|    value_loss         | 7.91e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.234    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 85000     |
|    time_elapsed       | 432       |
|    total_timesteps    | 425000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 86200     |
|    time_elapsed       | 438       |
|    total_timesteps    | 431000    |
| train/                |           |
|    entropy_loss       | -6.31e-06 |
|    explained_variance | -7.69     |
|    learning_rate      | 0.0007    |
|    n_updates          | 86199     |
|    policy_loss        | -2.99e-10 |
|    value_loss         | 6.9e-06   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 86300     |
|    time_elapsed       | 439       |
|    total_timesteps    | 431500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 87500     |
|    time_elapsed       | 445       |
|    total_timesteps    | 437500    |
| train/                |           |
|    entropy_loss       | -6.5e-06  |
|    explained_variance | -0.00826  |
|    learning_rate      | 0.0007    |
|    n_updates          | 87499     |
|    policy_loss        | -9.94e-11 |
|    value_loss         | 2.21e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 982       |
|    iterations         | 87600     |
|    time_elapsed       | 445       |
|    total_timesteps    | 438000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 88800     |
|    time_elapsed       | 451       |
|    total_timesteps    | 444000    |
| train/                |           |
|    entropy_loss       | -0.000575 |
|    explained_variance | -19.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 88799     |
|    policy_loss        | -6.02e-11 |
|    value_loss         | 3.53e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.226    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 88900     |
|    time_elapsed       | 452       |
|    total_timesteps    | 444500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.226    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 90100     |
|    time_elapsed       | 458       |
|    total_timesteps    | 450500    |
| train/                |           |
|    entropy_loss       | -0.000518 |
|    explained_variance | -380      |
|    learning_rate      | 0.0007    |
|    n_updates          | 90099     |
|    policy_loss        | -8.7e-09  |
|    value_loss         | 3.53e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.226    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 90200     |
|    time_elapsed       | 458       |
|    total_timesteps    | 451000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.239    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 91400     |
|    time_elapsed       | 464       |
|    total_timesteps    | 457000    |
| train/                |           |
|    entropy_loss       | -4.09e-06 |
|    explained_variance | -36.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 91399     |
|    policy_loss        | -0        |
|    value_loss         | 7.14e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.239    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 91500     |
|    time_elapsed       | 465       |
|    total_timesteps    | 457500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.234    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 92700     |
|    time_elapsed       | 471       |
|    total_timesteps    | 463500    |
| train/                |           |
|    entropy_loss       | -3.26e-06 |
|    explained_variance | -40.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 92699     |
|    policy_loss        | -0        |
|    value_loss         | 1.81e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.234    |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 92800     |
|    time_elapsed       | 471       |
|    total_timesteps    | 464000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 983       |
|    iterations         | 94000     |
|    time_elapsed       | 477       |
|    total_timesteps    | 470000    |
| train/                |           |
|    entropy_loss       | -3.96e-06 |
|    explained_variance | -1.33     |
|    learning_rate      | 0.0007    |
|    n_updates          | 93999     |
|    policy_loss        | -0        |
|    value_loss         | 4.81e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.23     |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 94100     |
|    time_elapsed       | 478       |
|    total_timesteps    | 470500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.227    |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 95300     |
|    time_elapsed       | 484       |
|    total_timesteps    | 476500    |
| train/                |           |
|    entropy_loss       | -3.67e-06 |
|    explained_variance | -3.11     |
|    learning_rate      | 0.0007    |
|    n_updates          | 95299     |
|    policy_loss        | -0        |
|    value_loss         | 1e-07     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.227    |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 95400     |
|    time_elapsed       | 484       |
|    total_timesteps    | 477000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.227    |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 96600     |
|    time_elapsed       | 490       |
|    total_timesteps    | 483000    |
| train/                |           |
|    entropy_loss       | -0.00121  |
|    explained_variance | -0.00988  |
|    learning_rate      | 0.0007    |
|    n_updates          | 96599     |
|    policy_loss        | -9.94e-09 |
|    value_loss         | 6.86e-09  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.227   |
| time/                 |          |
|    fps                | 984      |
|    iterations         | 96700    |
|    time_elapsed       | 491      |
|    total_timesteps    | 483500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.223   |
| time/                 |          |
|    fps                | 984      |
|    iterations         | 97900    |
|    time_elapsed       | 497      |
|    total_timesteps    | 489500   |
| train/                |          |
|    entropy_loss       | -0.00104 |
|    explained_variance | -44.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 97899    |
|    policy_loss        | -1.5e-09 |
|    value_loss         | 2.81e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.223    |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 98000     |
|    time_elapsed       | 497       |
|    total_timesteps    | 490000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.219   |
| time/                 |          |
|    fps                | 984      |
|    iterations         | 99200    |
|    time_elapsed       | 503      |
|    total_timesteps    | 496000   |
| train/                |          |
|    entropy_loss       | -4.3e-06 |
|    explained_variance | -21.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99199    |
|    policy_loss        | -0       |
|    value_loss         | 8.63e-08 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.219    |
| time/                 |           |
|    fps                | 984       |
|    iterations         | 99300     |
|    time_elapsed       | 504       |
|    total_timesteps    | 496500    |
| train/                |    

Step: 329, Action: 2, Reward: 0
Step: 330, Action: 2, Reward: 0
Step: 331, Action: 2, Reward: 0
Step: 332, Action: 2, Reward: 0
Step: 333, Action: 2, Reward: 0
Step: 334, Action: 2, Reward: 0
Step: 335, Action: 2, Reward: 0
Step: 336, Action: 2, Reward: 0
Step: 337, Action: 2, Reward: 0
Step: 338, Action: 2, Reward: 0
Step: 339, Action: 2, Reward: 0
Step: 340, Action: 2, Reward: 0
Step: 341, Action: 2, Reward: 0
Step: 342, Action: 2, Reward: 0
Step: 343, Action: 2, Reward: 0
Step: 344, Action: 2, Reward: 0
Step: 345, Action: 2, Reward: 0
Step: 346, Action: 2, Reward: 0
Step: 347, Action: 2, Reward: 0
Step: 348, Action: 2, Reward: 0
Step: 349, Action: 2, Reward: 0
Step: 350, Action: 2, Reward: 0
Step: 351, Action: 2, Reward: 0
Step: 352, Action: 2, Reward: 0
Step: 353, Action: 2, Reward: 0
Step: 354, Action: 2, Reward: 0
Step: 355, Action: 2, Reward: 0
Step: 356, Action: 2, Reward: 0
Step: 357, Action: 2, Reward: 0
Step: 358, Action: 2, Reward: 0
Step: 359, Action: 2, Reward: 0
Step: 36

Step: 735, Action: 2, Reward: 0
Step: 736, Action: 2, Reward: 0
Step: 737, Action: 2, Reward: 0
Step: 738, Action: 2, Reward: 0
Step: 739, Action: 2, Reward: 0
Step: 740, Action: 2, Reward: 0
Step: 741, Action: 2, Reward: 0
Step: 742, Action: 2, Reward: 0
Step: 743, Action: 2, Reward: 0
Step: 744, Action: 2, Reward: 0
Step: 745, Action: 2, Reward: 0
Step: 746, Action: 2, Reward: 0
Step: 747, Action: 2, Reward: 0
Step: 748, Action: 2, Reward: 0
Step: 749, Action: 2, Reward: 0
Step: 750, Action: 2, Reward: 0
Step: 751, Action: 2, Reward: 0
Step: 752, Action: 2, Reward: 0
Step: 753, Action: 2, Reward: 0
Step: 754, Action: 2, Reward: 0
Step: 755, Action: 2, Reward: 0
Step: 756, Action: 2, Reward: 0
Step: 757, Action: 2, Reward: 0
Step: 758, Action: 2, Reward: 0
Step: 759, Action: 2, Reward: 0
Step: 760, Action: 2, Reward: 0
Step: 761, Action: 2, Reward: 0
Step: 762, Action: 2, Reward: 0
Step: 763, Action: 2, Reward: 0
Step: 764, Action: 2, Reward: 0
Step: 765, Action: 2, Reward: 0
Step: 76

Step: 1168, Action: 2, Reward: 0
Step: 1169, Action: 2, Reward: 0
Step: 1170, Action: 2, Reward: 0
Step: 1171, Action: 2, Reward: 0
Step: 1172, Action: 2, Reward: 0
Step: 1173, Action: 2, Reward: 0
Step: 1174, Action: 2, Reward: 0
Step: 1175, Action: 2, Reward: 0
Step: 1176, Action: 2, Reward: 0
Step: 1177, Action: 2, Reward: 0
Step: 1178, Action: 2, Reward: 0
Step: 1179, Action: 2, Reward: 0
Step: 1180, Action: 2, Reward: 0
Step: 1181, Action: 2, Reward: 0
Step: 1182, Action: 2, Reward: 0
Step: 1183, Action: 2, Reward: 0
Step: 1184, Action: 2, Reward: 0
Step: 1185, Action: 2, Reward: 0
Step: 1186, Action: 2, Reward: 0
Step: 1187, Action: 2, Reward: 0
Step: 1188, Action: 2, Reward: 0
Step: 1189, Action: 2, Reward: 0
Step: 1190, Action: 2, Reward: 0
Step: 1191, Action: 2, Reward: 0
Step: 1192, Action: 2, Reward: 0
Step: 1193, Action: 2, Reward: 0
Step: 1194, Action: 2, Reward: 0
Step: 1195, Action: 2, Reward: 0
Step: 1196, Action: 2, Reward: 0
Step: 1197, Action: 2, Reward: 0
Step: 1198

Step: 1601, Action: 2, Reward: 0
Step: 1602, Action: 2, Reward: 0
Step: 1603, Action: 2, Reward: 0
Step: 1604, Action: 2, Reward: 0
Step: 1605, Action: 2, Reward: 0
Step: 1606, Action: 2, Reward: 0
Step: 1607, Action: 2, Reward: 0
Step: 1608, Action: 2, Reward: 0
Step: 1609, Action: 2, Reward: 0
Step: 1610, Action: 2, Reward: 0
Step: 1611, Action: 2, Reward: 0
Step: 1612, Action: 2, Reward: 0
Step: 1613, Action: 2, Reward: 0
Step: 1614, Action: 2, Reward: 0
Step: 1615, Action: 2, Reward: 0
Step: 1616, Action: 2, Reward: 0
Step: 1617, Action: 2, Reward: 0
Step: 1618, Action: 2, Reward: 0
Step: 1619, Action: 2, Reward: 0
Step: 1620, Action: 2, Reward: 0
Step: 1621, Action: 2, Reward: 0
Step: 1622, Action: 2, Reward: 0
Step: 1623, Action: 2, Reward: 0
Step: 1624, Action: 2, Reward: 0
Step: 1625, Action: 2, Reward: 0
Step: 1626, Action: 2, Reward: 0
Step: 1627, Action: 2, Reward: 0
Step: 1628, Action: 2, Reward: 0
Step: 1629, Action: 2, Reward: 0
Step: 1630, Action: 2, Reward: 0
Step: 1631

Step: 2036, Action: 2, Reward: 0
Step: 2037, Action: 2, Reward: 0
Step: 2038, Action: 2, Reward: 0
Step: 2039, Action: 2, Reward: 0
Step: 2040, Action: 2, Reward: 0
Step: 2041, Action: 2, Reward: 0
Step: 2042, Action: 2, Reward: 0
Step: 2043, Action: 2, Reward: 0
Step: 2044, Action: 2, Reward: 0
Step: 2045, Action: 2, Reward: 0
Step: 2046, Action: 2, Reward: 0
Step: 2047, Action: 2, Reward: 0
Step: 2048, Action: 2, Reward: 0
Step: 2049, Action: 2, Reward: 0
Step: 2050, Action: 2, Reward: 0
Step: 2051, Action: 2, Reward: 0
Step: 2052, Action: 2, Reward: 0
Step: 2053, Action: 2, Reward: 0
Step: 2054, Action: 2, Reward: 0
Step: 2055, Action: 2, Reward: 0
Step: 2056, Action: 2, Reward: 0
Step: 2057, Action: 2, Reward: 0
Step: 2058, Action: 2, Reward: 0
Step: 2059, Action: 2, Reward: 0
Step: 2060, Action: 2, Reward: 0
Step: 2061, Action: 2, Reward: 0
Step: 2062, Action: 2, Reward: 0
Step: 2063, Action: 2, Reward: 0
Step: 2064, Action: 2, Reward: 0
Step: 2065, Action: 2, Reward: 0
Step: 2066

Step: 2469, Action: 2, Reward: 0
Step: 2470, Action: 2, Reward: 0
Step: 2471, Action: 2, Reward: 0
Step: 2472, Action: 2, Reward: 0
Step: 2473, Action: 2, Reward: 0
Step: 2474, Action: 2, Reward: 0
Step: 2475, Action: 2, Reward: 0
Step: 2476, Action: 2, Reward: 0
Step: 2477, Action: 2, Reward: 0
Step: 2478, Action: 2, Reward: 0
Step: 2479, Action: 2, Reward: 0
Step: 2480, Action: 2, Reward: 0
Step: 2481, Action: 2, Reward: 0
Step: 2482, Action: 2, Reward: 0
Step: 2483, Action: 2, Reward: 0
Step: 2484, Action: 2, Reward: 0
Step: 2485, Action: 2, Reward: 0
Step: 2486, Action: 2, Reward: 0
Step: 2487, Action: 2, Reward: 0
Step: 2488, Action: 2, Reward: 0
Step: 2489, Action: 2, Reward: 0
Step: 2490, Action: 2, Reward: 0
Step: 2491, Action: 2, Reward: 0
Step: 2492, Action: 2, Reward: 0
Step: 2493, Action: 2, Reward: 0
Step: 2494, Action: 2, Reward: 0
Step: 2495, Action: 2, Reward: 0
Step: 2496, Action: 2, Reward: 0
Step: 2497, Action: 2, Reward: 0
Step: 2498, Action: 2, Reward: 0
Step: 2499

Step: 2903, Action: 2, Reward: 0
Step: 2904, Action: 2, Reward: 0
Step: 2905, Action: 2, Reward: 0
Step: 2906, Action: 2, Reward: 0
Step: 2907, Action: 2, Reward: 0
Step: 2908, Action: 2, Reward: 0
Step: 2909, Action: 2, Reward: 0
Step: 2910, Action: 2, Reward: 0
Step: 2911, Action: 2, Reward: 0
Step: 2912, Action: 2, Reward: 0
Step: 2913, Action: 2, Reward: 0
Step: 2914, Action: 2, Reward: 0
Step: 2915, Action: 2, Reward: 0
Step: 2916, Action: 2, Reward: 0
Step: 2917, Action: 2, Reward: 0
Step: 2918, Action: 2, Reward: 0
Step: 2919, Action: 2, Reward: 0
Step: 2920, Action: 2, Reward: 0
Step: 2921, Action: 2, Reward: 0
Step: 2922, Action: 2, Reward: 0
Step: 2923, Action: 2, Reward: 0
Step: 2924, Action: 2, Reward: 0
Step: 2925, Action: 2, Reward: 0
Step: 2926, Action: 2, Reward: 0
Step: 2927, Action: 2, Reward: 0
Step: 2928, Action: 2, Reward: 0
Step: 2929, Action: 2, Reward: 0
Step: 2930, Action: 2, Reward: 0
Step: 2931, Action: 2, Reward: 0
Step: 2932, Action: 2, Reward: 0
Step: 2933

Step: 3336, Action: 2, Reward: 0
Step: 3337, Action: 2, Reward: 0
Step: 3338, Action: 2, Reward: 0
Step: 3339, Action: 2, Reward: 0
Step: 3340, Action: 2, Reward: 0
Step: 3341, Action: 2, Reward: 0
Step: 3342, Action: 2, Reward: 0
Step: 3343, Action: 2, Reward: 0
Step: 3344, Action: 2, Reward: 0
Step: 3345, Action: 2, Reward: 0
Step: 3346, Action: 2, Reward: 0
Step: 3347, Action: 2, Reward: 0
Step: 3348, Action: 2, Reward: 0
Step: 3349, Action: 2, Reward: 0
Step: 3350, Action: 2, Reward: 0
Step: 3351, Action: 2, Reward: 0
Step: 3352, Action: 2, Reward: 0
Step: 3353, Action: 2, Reward: 0
Step: 3354, Action: 2, Reward: 0
Step: 3355, Action: 2, Reward: 0
Step: 3356, Action: 2, Reward: 0
Step: 3357, Action: 2, Reward: 0
Step: 3358, Action: 2, Reward: 0
Step: 3359, Action: 2, Reward: 0
Step: 3360, Action: 2, Reward: 0
Step: 3361, Action: 2, Reward: 0
Step: 3362, Action: 2, Reward: 0
Step: 3363, Action: 2, Reward: 0
Step: 3364, Action: 2, Reward: 0
Step: 3365, Action: 2, Reward: 0
Step: 3366

Step: 3770, Action: 2, Reward: 0
Step: 3771, Action: 2, Reward: 0
Step: 3772, Action: 2, Reward: 0
Step: 3773, Action: 2, Reward: 0
Step: 3774, Action: 2, Reward: 0
Step: 3775, Action: 2, Reward: 0
Step: 3776, Action: 2, Reward: 0
Step: 3777, Action: 2, Reward: 0
Step: 3778, Action: 2, Reward: 0
Step: 3779, Action: 2, Reward: 0
Step: 3780, Action: 2, Reward: 0
Step: 3781, Action: 2, Reward: 0
Step: 3782, Action: 2, Reward: 0
Step: 3783, Action: 2, Reward: 0
Step: 3784, Action: 2, Reward: 0
Step: 3785, Action: 2, Reward: 0
Step: 3786, Action: 2, Reward: 0
Step: 3787, Action: 2, Reward: 0
Step: 3788, Action: 2, Reward: 0
Step: 3789, Action: 2, Reward: 0
Step: 3790, Action: 2, Reward: 0
Step: 3791, Action: 2, Reward: 0
Step: 3792, Action: 2, Reward: 0
Step: 3793, Action: 2, Reward: 0
Step: 3794, Action: 2, Reward: 0
Step: 3795, Action: 2, Reward: 0
Step: 3796, Action: 2, Reward: 0
Step: 3797, Action: 2, Reward: 0
Step: 3798, Action: 2, Reward: 0
Step: 3799, Action: 2, Reward: 0
Step: 3800

Step: 4204, Action: 2, Reward: 0
Step: 4205, Action: 2, Reward: 0
Step: 4206, Action: 2, Reward: 0
Step: 4207, Action: 2, Reward: 0
Step: 4208, Action: 2, Reward: 0
Step: 4209, Action: 2, Reward: 0
Step: 4210, Action: 2, Reward: 0
Step: 4211, Action: 2, Reward: 0
Step: 4212, Action: 2, Reward: 0
Step: 4213, Action: 2, Reward: 0
Step: 4214, Action: 2, Reward: 0
Step: 4215, Action: 2, Reward: 0
Step: 4216, Action: 2, Reward: 0
Step: 4217, Action: 2, Reward: 0
Step: 4218, Action: 2, Reward: 0
Step: 4219, Action: 2, Reward: 0
Step: 4220, Action: 2, Reward: 0
Step: 4221, Action: 2, Reward: 0
Step: 4222, Action: 2, Reward: 0
Step: 4223, Action: 2, Reward: 0
Step: 4224, Action: 2, Reward: 0
Step: 4225, Action: 2, Reward: 0
Step: 4226, Action: 2, Reward: 0
Step: 4227, Action: 2, Reward: 0
Step: 4228, Action: 2, Reward: 0
Step: 4229, Action: 2, Reward: 0
Step: 4230, Action: 2, Reward: 0
Step: 4231, Action: 2, Reward: 0
Step: 4232, Action: 2, Reward: 0
Step: 4233, Action: 2, Reward: 0
Step: 4234

Step: 4636, Action: 2, Reward: 0
Step: 4637, Action: 2, Reward: 0
Step: 4638, Action: 2, Reward: 0
Step: 4639, Action: 2, Reward: 0
Step: 4640, Action: 2, Reward: 0
Step: 4641, Action: 2, Reward: 0
Step: 4642, Action: 2, Reward: 0
Step: 4643, Action: 2, Reward: 0
Step: 4644, Action: 2, Reward: 0
Step: 4645, Action: 2, Reward: 0
Step: 4646, Action: 2, Reward: 0
Step: 4647, Action: 2, Reward: 0
Step: 4648, Action: 2, Reward: 0
Step: 4649, Action: 2, Reward: 0
Step: 4650, Action: 2, Reward: 0
Step: 4651, Action: 2, Reward: 0
Step: 4652, Action: 2, Reward: 0
Step: 4653, Action: 2, Reward: 0
Step: 4654, Action: 2, Reward: 0
Step: 4655, Action: 2, Reward: 0
Step: 4656, Action: 2, Reward: 0
Step: 4657, Action: 2, Reward: 0
Step: 4658, Action: 2, Reward: 0
Step: 4659, Action: 2, Reward: 0
Step: 4660, Action: 2, Reward: 0
Step: 4661, Action: 2, Reward: 0
Step: 4662, Action: 2, Reward: 0
Step: 4663, Action: 2, Reward: 0
Step: 4664, Action: 2, Reward: 0
Step: 4665, Action: 2, Reward: 0
Step: 4666

Step: 5068, Action: 0, Reward: 0
Step: 5069, Action: 0, Reward: 0
Step: 5070, Action: 0, Reward: 0
Step: 5071, Action: 0, Reward: 0
Step: 5072, Action: 0, Reward: 0
Step: 5073, Action: 0, Reward: 0
Step: 5074, Action: 0, Reward: 0
Step: 5075, Action: 0, Reward: 0
Step: 5076, Action: 0, Reward: 0
Step: 5077, Action: 0, Reward: 0
Step: 5078, Action: 0, Reward: 0
Step: 5079, Action: 0, Reward: 0
Step: 5080, Action: 0, Reward: 0
Step: 5081, Action: 0, Reward: 0
Step: 5082, Action: 0, Reward: 0
Step: 5083, Action: 0, Reward: 0
Step: 5084, Action: 0, Reward: 0
Step: 5085, Action: 0, Reward: 0
Step: 5086, Action: 0, Reward: 0
Step: 5087, Action: 0, Reward: 0
Step: 5088, Action: 2, Reward: 0
Step: 5089, Action: 2, Reward: 0
Step: 5090, Action: 2, Reward: 0
Step: 5091, Action: 2, Reward: 0
Step: 5092, Action: 2, Reward: 0
Step: 5093, Action: 2, Reward: 0
Step: 5094, Action: 2, Reward: 0
Step: 5095, Action: 2, Reward: 0
Step: 5096, Action: 0, Reward: 0
Step: 5097, Action: 0, Reward: 0
Step: 5098

Step: 5502, Action: 0, Reward: 0
Step: 5503, Action: 0, Reward: 0
Step: 5504, Action: 0, Reward: 0
Step: 5505, Action: 0, Reward: 0
Step: 5506, Action: 0, Reward: 0
Step: 5507, Action: 0, Reward: 0
Step: 5508, Action: 0, Reward: 0
Step: 5509, Action: 0, Reward: 0
Step: 5510, Action: 2, Reward: 0
Step: 5511, Action: 0, Reward: 0
Step: 5512, Action: 0, Reward: 0
Step: 5513, Action: 2, Reward: 0
Step: 5514, Action: 2, Reward: 0
Step: 5515, Action: 0, Reward: 0
Step: 5516, Action: 0, Reward: 0
Step: 5517, Action: 0, Reward: 0
Step: 5518, Action: 0, Reward: 0
Step: 5519, Action: 0, Reward: 0
Step: 5520, Action: 0, Reward: 0
Step: 5521, Action: 0, Reward: 0
Step: 5522, Action: 0, Reward: 0
Step: 5523, Action: 0, Reward: 0
Step: 5524, Action: 2, Reward: 0
Step: 5525, Action: 2, Reward: 0
Step: 5526, Action: 2, Reward: 0
Step: 5527, Action: 2, Reward: 0
Step: 5528, Action: 2, Reward: 0
Step: 5529, Action: 2, Reward: 0
Step: 5530, Action: 2, Reward: 0
Step: 5531, Action: 0, Reward: 0
Step: 5532

Step: 5938, Action: 0, Reward: 0
Step: 5939, Action: 0, Reward: 0
Step: 5940, Action: 0, Reward: 0
Step: 5941, Action: 0, Reward: 0
Step: 5942, Action: 0, Reward: 0
Step: 5943, Action: 0, Reward: 0
Step: 5944, Action: 0, Reward: 0
Step: 5945, Action: 0, Reward: 0
Step: 5946, Action: 0, Reward: 0
Step: 5947, Action: 0, Reward: 0
Step: 5948, Action: 0, Reward: 0
Step: 5949, Action: 0, Reward: 0
Step: 5950, Action: 0, Reward: 0
Step: 5951, Action: 0, Reward: 0
Step: 5952, Action: 0, Reward: 0
Step: 5953, Action: 0, Reward: 0
Step: 5954, Action: 0, Reward: 0
Step: 5955, Action: 0, Reward: 0
Step: 5956, Action: 0, Reward: 0
Step: 5957, Action: 0, Reward: 0
Step: 5958, Action: 0, Reward: 0
Step: 5959, Action: 0, Reward: 0
Step: 5960, Action: 0, Reward: 0
Step: 5961, Action: 0, Reward: 0
Step: 5962, Action: 0, Reward: 0
Step: 5963, Action: 0, Reward: 0
Step: 5964, Action: 0, Reward: 0
Step: 5965, Action: 0, Reward: 0
Step: 5966, Action: 0, Reward: 0
Step: 5967, Action: 0, Reward: 0
Step: 5968

Step: 6373, Action: 0, Reward: 0
Step: 6374, Action: 0, Reward: 0
Step: 6375, Action: 0, Reward: 0
Step: 6376, Action: 0, Reward: 0
Step: 6377, Action: 0, Reward: 0
Step: 6378, Action: 0, Reward: 0
Step: 6379, Action: 0, Reward: 0
Step: 6380, Action: 0, Reward: 0
Step: 6381, Action: 0, Reward: 0
Step: 6382, Action: 0, Reward: 0
Step: 6383, Action: 0, Reward: 0
Step: 6384, Action: 0, Reward: 0
Step: 6385, Action: 0, Reward: 0
Step: 6386, Action: 0, Reward: 0
Step: 6387, Action: 0, Reward: 0
Step: 6388, Action: 0, Reward: 0
Step: 6389, Action: 0, Reward: 0
Step: 6390, Action: 0, Reward: 0
Step: 6391, Action: 0, Reward: 0
Step: 6392, Action: 0, Reward: 0
Step: 6393, Action: 0, Reward: 0
Step: 6394, Action: 0, Reward: 0
Step: 6395, Action: 0, Reward: 0
Step: 6396, Action: 0, Reward: 0
Step: 6397, Action: 0, Reward: 0
Step: 6398, Action: 0, Reward: 0
Step: 6399, Action: 0, Reward: 0
Step: 6400, Action: 0, Reward: 0
Step: 6401, Action: 0, Reward: 0
Step: 6402, Action: 0, Reward: 0
Step: 6403

Step: 6808, Action: 0, Reward: 0
Step: 6809, Action: 0, Reward: 0
Step: 6810, Action: 0, Reward: 0
Step: 6811, Action: 0, Reward: 0
Step: 6812, Action: 0, Reward: 0
Step: 6813, Action: 0, Reward: 0
Step: 6814, Action: 0, Reward: 0
Step: 6815, Action: 0, Reward: 0
Step: 6816, Action: 0, Reward: 0
Step: 6817, Action: 0, Reward: 0
Step: 6818, Action: 0, Reward: 0
Step: 6819, Action: 0, Reward: 0
Step: 6820, Action: 0, Reward: 0
Step: 6821, Action: 0, Reward: 0
Step: 6822, Action: 0, Reward: 0
Step: 6823, Action: 0, Reward: 0
Step: 6824, Action: 0, Reward: 0
Step: 6825, Action: 0, Reward: 0
Step: 6826, Action: 0, Reward: 0
Step: 6827, Action: 0, Reward: 0
Step: 6828, Action: 0, Reward: 0
Step: 6829, Action: 0, Reward: 0
Step: 6830, Action: 0, Reward: 0
Step: 6831, Action: 0, Reward: 0
Step: 6832, Action: 0, Reward: 0
Step: 6833, Action: 0, Reward: 0
Step: 6834, Action: 0, Reward: 0
Step: 6835, Action: 0, Reward: 0
Step: 6836, Action: 0, Reward: 0
Step: 6837, Action: 0, Reward: 0
Step: 6838

Step: 7244, Action: 0, Reward: 0
Step: 7245, Action: 0, Reward: 0
Step: 7246, Action: 0, Reward: 0
Step: 7247, Action: 0, Reward: 0
Step: 7248, Action: 0, Reward: 0
Step: 7249, Action: 0, Reward: 0
Step: 7250, Action: 0, Reward: 0
Step: 7251, Action: 0, Reward: 0
Step: 7252, Action: 0, Reward: 0
Step: 7253, Action: 0, Reward: 0
Step: 7254, Action: 0, Reward: 0
Step: 7255, Action: 0, Reward: 0
Step: 7256, Action: 0, Reward: 0
Step: 7257, Action: 0, Reward: 0
Step: 7258, Action: 0, Reward: 0
Step: 7259, Action: 0, Reward: 0
Step: 7260, Action: 0, Reward: 0
Step: 7261, Action: 0, Reward: 0
Step: 7262, Action: 0, Reward: 0
Step: 7263, Action: 0, Reward: 0
Step: 7264, Action: 0, Reward: 0
Step: 7265, Action: 0, Reward: 0
Step: 7266, Action: 0, Reward: 0
Step: 7267, Action: 0, Reward: 0
Step: 7268, Action: 0, Reward: 0
Step: 7269, Action: 0, Reward: 0
Step: 7270, Action: 0, Reward: 0
Step: 7271, Action: 0, Reward: 0
Step: 7272, Action: 0, Reward: 0
Step: 7273, Action: 0, Reward: 0
Step: 7274

Step: 7678, Action: 0, Reward: 0
Step: 7679, Action: 0, Reward: 0
Step: 7680, Action: 0, Reward: 0
Step: 7681, Action: 0, Reward: 0
Step: 7682, Action: 0, Reward: 0
Step: 7683, Action: 0, Reward: 0
Step: 7684, Action: 0, Reward: 0
Step: 7685, Action: 0, Reward: 0
Step: 7686, Action: 0, Reward: 0
Step: 7687, Action: 0, Reward: 0
Step: 7688, Action: 0, Reward: 0
Step: 7689, Action: 0, Reward: 0
Step: 7690, Action: 0, Reward: 0
Step: 7691, Action: 0, Reward: 0
Step: 7692, Action: 0, Reward: 0
Step: 7693, Action: 0, Reward: 0
Step: 7694, Action: 0, Reward: 0
Step: 7695, Action: 0, Reward: 0
Step: 7696, Action: 0, Reward: 0
Step: 7697, Action: 0, Reward: 0
Step: 7698, Action: 0, Reward: 0
Step: 7699, Action: 0, Reward: 0
Step: 7700, Action: 0, Reward: 0
Step: 7701, Action: 0, Reward: 0
Step: 7702, Action: 0, Reward: 0
Step: 7703, Action: 0, Reward: 0
Step: 7704, Action: 0, Reward: 0
Step: 7705, Action: 0, Reward: 0
Step: 7706, Action: 0, Reward: 0
Step: 7707, Action: 0, Reward: 0
Step: 7708

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Step: 166, Action: 0, Reward: 0
Step: 167, Action: 0, Reward: 0
Step: 168, Action: 0, Reward: 0
Step: 169, Action: 0, Reward: 0
Step: 170, Action: 0, Reward: 0
Step: 171, Action: 0, Reward: 0
Step: 172, Action: 0, Reward: 0
Step: 173, Action: 0, Reward: 0
Step: 174, Action: 0, Reward: 0
Step: 175, Action: 0, Reward: 0
Step: 176, Action: 0, Reward: 0
Step: 177, Action: 0, Reward: 0
Step: 178, Action: 0, Reward: 0
Step: 179, Action: 0, Reward: 0
Step: 180, Action: 0, Reward: 0
Step: 181, Action: 0, Reward: 0
Step: 182, Action: 0, Reward: 0
Step: 183, Action: 0, Reward: 0
Step: 184, Action: 0, Reward: 0
Step: 185, Action: 0, Reward: 0
Step: 186, Action: 0, Reward: 0
Step: 187, Action: 0, Reward: 0
Step: 188, Action: 0, Reward: 0
Step: 189, Action: 0, Reward: 0
Step: 190, Action: 0, Reward: 0
Step: 191, Action: 0, Reward: 0
Step: 192, Action: 0, Reward: 0
Step: 193, Action: 0, Reward: 0
Step: 194, Action: 0, Reward: 0
Step: 195, Action: 0, Reward: 0
Step: 196, Action: 0, Reward: 0
Step: 19

Step: 598, Action: 0, Reward: 0
Step: 599, Action: 0, Reward: 0
Step: 600, Action: 0, Reward: 0
Step: 601, Action: 0, Reward: 0
Step: 602, Action: 0, Reward: 0
Step: 603, Action: 0, Reward: 0
Step: 604, Action: 0, Reward: 0
Step: 605, Action: 0, Reward: 0
Step: 606, Action: 0, Reward: 0
Step: 607, Action: 0, Reward: 0
Step: 608, Action: 0, Reward: 0
Step: 609, Action: 0, Reward: 0
Step: 610, Action: 0, Reward: 0
Step: 611, Action: 0, Reward: 0
Step: 612, Action: 0, Reward: 0
Step: 613, Action: 0, Reward: 0
Step: 614, Action: 0, Reward: 0
Step: 615, Action: 0, Reward: 0
Step: 616, Action: 0, Reward: 0
Step: 617, Action: 0, Reward: 0
Step: 618, Action: 0, Reward: 0
Step: 619, Action: 0, Reward: 0
Step: 620, Action: 0, Reward: 0
Step: 621, Action: 0, Reward: 0
Step: 622, Action: 0, Reward: 0
Step: 623, Action: 0, Reward: 0
Step: 624, Action: 0, Reward: 0
Step: 625, Action: 0, Reward: 0
Step: 626, Action: 0, Reward: 0
Step: 627, Action: 0, Reward: 0
Step: 628, Action: 0, Reward: 0
Step: 62

Step: 1034, Action: 0, Reward: 0
Step: 1035, Action: 0, Reward: 0
Step: 1036, Action: 0, Reward: 0
Step: 1037, Action: 0, Reward: 0
Step: 1038, Action: 0, Reward: 0
Step: 1039, Action: 0, Reward: 0
Step: 1040, Action: 0, Reward: 0
Step: 1041, Action: 0, Reward: 0
Step: 1042, Action: 0, Reward: 0
Step: 1043, Action: 0, Reward: 0
Step: 1044, Action: 0, Reward: 0
Step: 1045, Action: 0, Reward: 0
Step: 1046, Action: 0, Reward: 0
Step: 1047, Action: 0, Reward: 0
Step: 1048, Action: 0, Reward: 0
Step: 1049, Action: 0, Reward: 0
Step: 1050, Action: 0, Reward: 0
Step: 1051, Action: 0, Reward: 0
Step: 1052, Action: 0, Reward: 0
Step: 1053, Action: 0, Reward: 0
Step: 1054, Action: 0, Reward: 0
Step: 1055, Action: 0, Reward: 0
Step: 1056, Action: 0, Reward: 0
Step: 1057, Action: 0, Reward: 0
Step: 1058, Action: 0, Reward: 0
Step: 1059, Action: 0, Reward: 0
Step: 1060, Action: 0, Reward: 0
Step: 1061, Action: 0, Reward: 0
Step: 1062, Action: 0, Reward: 0
Step: 1063, Action: 0, Reward: 0
Step: 1064

Step: 1467, Action: 0, Reward: 0
Step: 1468, Action: 0, Reward: 0
Step: 1469, Action: 0, Reward: 0
Step: 1470, Action: 0, Reward: 0
Step: 1471, Action: 0, Reward: 0
Step: 1472, Action: 0, Reward: 0
Step: 1473, Action: 0, Reward: 0
Step: 1474, Action: 0, Reward: 0
Step: 1475, Action: 0, Reward: 0
Step: 1476, Action: 0, Reward: 0
Step: 1477, Action: 0, Reward: 0
Step: 1478, Action: 0, Reward: 0
Step: 1479, Action: 0, Reward: 0
Step: 1480, Action: 0, Reward: 0
Step: 1481, Action: 0, Reward: 0
Step: 1482, Action: 0, Reward: 0
Step: 1483, Action: 0, Reward: 0
Step: 1484, Action: 0, Reward: 0
Step: 1485, Action: 0, Reward: 0
Step: 1486, Action: 0, Reward: 0
Step: 1487, Action: 0, Reward: 0
Step: 1488, Action: 0, Reward: 0
Step: 1489, Action: 0, Reward: 0
Step: 1490, Action: 0, Reward: 0
Step: 1491, Action: 0, Reward: 0
Step: 1492, Action: 0, Reward: 0
Step: 1493, Action: 0, Reward: 0
Step: 1494, Action: 0, Reward: 0
Step: 1495, Action: 0, Reward: 0
Step: 1496, Action: 0, Reward: 0
Step: 1497

Step: 1900, Action: 0, Reward: 0
Step: 1901, Action: 0, Reward: 0
Step: 1902, Action: 0, Reward: 0
Step: 1903, Action: 0, Reward: 0
Step: 1904, Action: 0, Reward: 0
Step: 1905, Action: 0, Reward: 0
Step: 1906, Action: 0, Reward: 0
Step: 1907, Action: 0, Reward: 0
Step: 1908, Action: 0, Reward: 0
Step: 1909, Action: 0, Reward: 0
Step: 1910, Action: 0, Reward: 0
Step: 1911, Action: 0, Reward: 0
Step: 1912, Action: 0, Reward: 0
Step: 1913, Action: 0, Reward: 0
Step: 1914, Action: 0, Reward: 0
Step: 1915, Action: 0, Reward: 0
Step: 1916, Action: 0, Reward: 0
Step: 1917, Action: 0, Reward: 0
Step: 1918, Action: 0, Reward: 0
Step: 1919, Action: 0, Reward: 0
Step: 1920, Action: 0, Reward: 0
Step: 1921, Action: 0, Reward: 0
Step: 1922, Action: 0, Reward: 0
Step: 1923, Action: 0, Reward: 0
Step: 1924, Action: 0, Reward: 0
Step: 1925, Action: 0, Reward: 0
Step: 1926, Action: 0, Reward: 0
Step: 1927, Action: 0, Reward: 0
Step: 1928, Action: 0, Reward: 0
Step: 1929, Action: 0, Reward: 0
Step: 1930

Step: 2336, Action: 0, Reward: 0
Step: 2337, Action: 0, Reward: 0
Step: 2338, Action: 0, Reward: 0
Step: 2339, Action: 0, Reward: 0
Step: 2340, Action: 0, Reward: 0
Step: 2341, Action: 0, Reward: 0
Step: 2342, Action: 0, Reward: 0
Step: 2343, Action: 0, Reward: 0
Step: 2344, Action: 0, Reward: 0
Step: 2345, Action: 0, Reward: 0
Step: 2346, Action: 0, Reward: 0
Step: 2347, Action: 0, Reward: 0
Step: 2348, Action: 0, Reward: 0
Step: 2349, Action: 0, Reward: 0
Step: 2350, Action: 0, Reward: 0
Step: 2351, Action: 0, Reward: 0
Step: 2352, Action: 0, Reward: 0
Step: 2353, Action: 0, Reward: 0
Step: 2354, Action: 0, Reward: 0
Step: 2355, Action: 0, Reward: 0
Step: 2356, Action: 0, Reward: 0
Step: 2357, Action: 0, Reward: 0
Step: 2358, Action: 0, Reward: 0
Step: 2359, Action: 0, Reward: 0
Step: 2360, Action: 0, Reward: 0
Step: 2361, Action: 0, Reward: 0
Step: 2362, Action: 0, Reward: 0
Step: 2363, Action: 0, Reward: 0
Step: 2364, Action: 0, Reward: 0
Step: 2365, Action: 0, Reward: 0
Step: 2366

Step: 2770, Action: 2, Reward: 0
Step: 2771, Action: 2, Reward: 0
Step: 2772, Action: 2, Reward: 0
Step: 2773, Action: 2, Reward: 0
Step: 2774, Action: 2, Reward: 0
Step: 2775, Action: 2, Reward: 0
Step: 2776, Action: 2, Reward: 0
Step: 2777, Action: 2, Reward: 0
Step: 2778, Action: 2, Reward: 0
Step: 2779, Action: 2, Reward: 0
Step: 2780, Action: 2, Reward: 0
Step: 2781, Action: 2, Reward: 0
Step: 2782, Action: 0, Reward: 0
Step: 2783, Action: 0, Reward: 0
Step: 2784, Action: 2, Reward: 0
Step: 2785, Action: 2, Reward: 0
Step: 2786, Action: 2, Reward: 0
Step: 2787, Action: 2, Reward: 0
Step: 2788, Action: 2, Reward: 0
Step: 2789, Action: 2, Reward: 0
Step: 2790, Action: 2, Reward: 0
Step: 2791, Action: 2, Reward: 0
Step: 2792, Action: 2, Reward: 0
Step: 2793, Action: 2, Reward: 0
Step: 2794, Action: 2, Reward: 0
Step: 2795, Action: 2, Reward: 0
Step: 2796, Action: 2, Reward: 0
Step: 2797, Action: 2, Reward: 0
Step: 2798, Action: 2, Reward: 0
Step: 2799, Action: 2, Reward: 0
Step: 2800

Step: 3206, Action: 2, Reward: 0
Step: 3207, Action: 2, Reward: 0
Step: 3208, Action: 2, Reward: 0
Step: 3209, Action: 2, Reward: 0
Step: 3210, Action: 2, Reward: 0
Step: 3211, Action: 2, Reward: 0
Step: 3212, Action: 2, Reward: 0
Step: 3213, Action: 2, Reward: 0
Step: 3214, Action: 2, Reward: 0
Step: 3215, Action: 2, Reward: 0
Step: 3216, Action: 2, Reward: 0
Step: 3217, Action: 2, Reward: 0
Step: 3218, Action: 2, Reward: 0
Step: 3219, Action: 2, Reward: 0
Step: 3220, Action: 2, Reward: 0
Step: 3221, Action: 2, Reward: 0
Step: 3222, Action: 2, Reward: 0
Step: 3223, Action: 2, Reward: 0
Step: 3224, Action: 2, Reward: 0
Step: 3225, Action: 2, Reward: 0
Step: 3226, Action: 2, Reward: 0
Step: 3227, Action: 2, Reward: 0
Step: 3228, Action: 2, Reward: 0
Step: 3229, Action: 2, Reward: 0
Step: 3230, Action: 2, Reward: 0
Step: 3231, Action: 2, Reward: 0
Step: 3232, Action: 2, Reward: 0
Step: 3233, Action: 2, Reward: 0
Step: 3234, Action: 2, Reward: 0
Step: 3235, Action: 2, Reward: 0
Step: 3236

Step: 3644, Action: 0, Reward: 0
Step: 3645, Action: 0, Reward: 0
Step: 3646, Action: 0, Reward: 0
Step: 3647, Action: 0, Reward: 0
Step: 3648, Action: 0, Reward: 0
Step: 3649, Action: 0, Reward: 0
Step: 3650, Action: 0, Reward: 0
Step: 3651, Action: 2, Reward: 0
Step: 3652, Action: 2, Reward: 0
Step: 3653, Action: 2, Reward: 0
Step: 3654, Action: 0, Reward: 0
Step: 3655, Action: 2, Reward: 0
Step: 3656, Action: 0, Reward: 0
Step: 3657, Action: 0, Reward: 0
Step: 3658, Action: 0, Reward: 0
Step: 3659, Action: 0, Reward: 0
Step: 3660, Action: 0, Reward: 0
Step: 3661, Action: 0, Reward: 0
Step: 3662, Action: 0, Reward: 0
Step: 3663, Action: 0, Reward: 0
Step: 3664, Action: 0, Reward: 0
Step: 3665, Action: 0, Reward: 0
Step: 3666, Action: 0, Reward: 0
Step: 3667, Action: 0, Reward: 0
Step: 3668, Action: 0, Reward: 0
Step: 3669, Action: 0, Reward: 0
Step: 3670, Action: 0, Reward: 0
Step: 3671, Action: 0, Reward: 0
Step: 3672, Action: 0, Reward: 0
Step: 3673, Action: 0, Reward: 0
Step: 3674

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


In [4]:
#ensemble learning single agent
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DQN, A2C
from sklearn.preprocessing import StandardScaler
from collections import Counter

# Function to load and normalize data
def load_and_normalize_data(train_file, test_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)

    df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
    df_test['timestamp'] = pd.to_datetime(df_test['timestamp'])

    scaler = StandardScaler()
    columns_to_normalize = ['open', 'high', 'low', 'close', 'volume']

    df_train[columns_to_normalize] = scaler.fit_transform(df_train[columns_to_normalize])
    df_test[columns_to_normalize] = scaler.transform(df_test[columns_to_normalize])

    return df_train, df_test, scaler

# Single-Agent Trading Environment
class SingleAgentEnv(gym.Env):
    def __init__(self, data, window_size=10, initial_balance=10000, scaler=None):
        super(SingleAgentEnv, self).__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0 = neutral, 1 = long, -1 = short
        self.trades = []
        self.entry_price = 0
        self.log = []  # Log for detailed reporting
        self.scaler = scaler  # Store the scaler for inverse scaling

        # Action space: hold (0), buy (1), sell (2)
        self.action_space = spaces.Discrete(3)

        # Observation space: Stock prices (open, high, low, close, volume)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 5), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.position = 0
        self.balance = self.initial_balance
        self.trades = []
        self.entry_price = 0
        self.log = []  # Reset log

        # Log initial holdings
        self.log.append(f"Agent starts with 0 holdings (neutral position), Initial Balance: {self.balance}")
        return self._get_observation(), {}

    def _get_observation(self):
        obs = self.data.iloc[self.current_step:self.current_step + self.window_size][['open', 'high', 'low', 'close', 'volume']].values.astype(np.float32)
        return obs

    def inverse_scale_price(self, price):
        # Inverse transform the scaled price to get the original value (considering the 'close' column)
        inverse_scaled = self.scaler.inverse_transform([[0, 0, 0, price, 0]])[0][3]
        return inverse_scaled

    def step(self, action):
        reward = 0
        current_price = self.data.iloc[self.current_step]['close']
        original_price = self.inverse_scale_price(current_price)  # Get original (inverse-scaled) price

        # If agent buys
        if action == 1:
            if self.position == 0:  # Only buy if neutral
                self.position = 1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent buys at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Long")
            elif self.position == -1:  # Close short position
                reward = self.entry_price - current_price  # Scaled reward
                original_reward = self.inverse_scale_price(self.entry_price) - original_price
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes short at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        # If agent sells
        elif action == 2:
            if self.position == 0:  # Only sell if neutral
                self.position = -1
                self.entry_price = current_price
                original_entry_price = self.inverse_scale_price(current_price)
                self.log.append(f"Agent sells (short) at {original_entry_price}, Current Balance: {self.balance}, Holdings: 1 Short")
            elif self.position == 1:  # Close long position
                reward = current_price - self.entry_price  # Scaled reward
                original_reward = original_price - self.inverse_scale_price(self.entry_price)
                self.balance += original_reward
                self.position = 0
                self.trades.append(original_reward)
                self.log.append(f"Agent closes long at {original_price}, profit: {original_reward}, Current Balance: {self.balance}, Holdings: 0")

        self.current_step += 1
        terminated = self.current_step >= len(self.data) - self.window_size
        truncated = False
        return self._get_observation(), reward, terminated, truncated, {}

    def generate_report(self):
        print("\n--- Agent Report ---")
        for log in self.log:
            print(log)
        print(f"Final Balance: {self.balance}")
        print(f"Total Profit: {self.balance - self.initial_balance}")
        print(f"Number of Holdings (Long): {1 if self.position == 1 else 0}, Short: {1 if self.position == -1 else 0}")
        print("-" * 40)

# Function to calculate metrics
def calculate_metrics(trades, initial_balance, final_balance):
    # Total Profit
    total_profit = final_balance - initial_balance

    # Cumulative Return
    cumulative_return = (final_balance - initial_balance) / initial_balance

    # Win Rate
    positive_trades = [trade for trade in trades if trade > 0]
    win_rate = len(positive_trades) / len(trades) if trades else 0

    # Profit Factor
    gross_profit = sum(trade for trade in trades if trade > 0)
    gross_loss = -sum(trade for trade in trades if trade < 0)
    profit_factor = gross_profit / gross_loss if gross_loss != 0 else np.inf

    # Sharpe Ratio
    returns = np.array(trades)
    mean_return = np.mean(returns)
    std_return = np.std(returns)
    sharpe_ratio = mean_return / std_return if std_return != 0 else 0

    # Sortino Ratio (using only downside standard deviation)
    downside_std = np.std([min(0, r) for r in returns])
    sortino_ratio = mean_return / downside_std if downside_std != 0 else 0

    # Maximum Drawdown
    balance_series = np.cumsum([initial_balance] + trades)  # Series of balance over time
    peak_balance = np.maximum.accumulate(balance_series)
    drawdowns = (peak_balance - balance_series) / peak_balance
    max_drawdown = np.max(drawdowns) if drawdowns.size > 0 else 0

    metrics = {
        "Total Profit": total_profit,
        "Cumulative Return": cumulative_return,
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Maximum Drawdown": max_drawdown
    }
    return metrics

# Ensemble model function
def ensemble_predict(actions):
    # Convert numpy arrays to integers for each action
    actions = [int(action) for action in actions]
    # Perform a majority vote among the actions (hold, buy, sell)
    action_counts = Counter(actions)
    return action_counts.most_common(1)[0][0]

# Train and evaluate the ensemble model
def train_and_evaluate():
    # Load and normalize the data
    train_file = 'NVDA_TRAINING.csv'
    test_file = 'NVDA_TESTING.csv'
    df_train_normalized, df_test_normalized, scaler = load_and_normalize_data(train_file, test_file)

    # Create the environment using the training data
    env_train = SingleAgentEnv(df_train_normalized, window_size=10, scaler=scaler)

    # Initialize each model and train them separately
    ppo_model = PPO("MlpPolicy", env_train, verbose=1)
    dqn_model = DQN("MlpPolicy", env_train, verbose=1)
    a2c_model = A2C("MlpPolicy", env_train, verbose=1)

    # Train each model
    ppo_model.learn(total_timesteps=50000)
    dqn_model.learn(total_timesteps=50000)
    a2c_model.learn(total_timesteps=50000)

    # Test the ensemble model on the training data
    obs, _ = env_train.reset()
    done = False
    while not done:
        # Get predictions from each model
        ppo_action, _ = ppo_model.predict(obs)
        dqn_action, _ = dqn_model.predict(obs)
        a2c_action, _ = a2c_model.predict(obs)

        # Aggregate the actions through majority voting
        final_action = ensemble_predict([ppo_action, dqn_action, a2c_action])

        # Step the environment with the final action
        obs, reward, done, truncated, info = env_train.step(final_action)

    # Calculate and display training metrics
    training_metrics = calculate_metrics(env_train.trades, env_train.initial_balance, env_train.balance)
    print("\n--- Training Metrics ---")
    for metric, value in training_metrics.items():
        print(f"{metric}: {value}")

    # Test the ensemble model on the testing data
    env_test = SingleAgentEnv(df_test_normalized, window_size=10, scaler=scaler)
    obs, _ = env_test.reset()
    done = False
    while not done:
        # Get predictions from each model
        ppo_action, _ = ppo_model.predict(obs)
        dqn_action, _ = dqn_model.predict(obs)
        a2c_action, _ = a2c_model.predict(obs)

        # Aggregate the actions through majority voting
        final_action = ensemble_predict([ppo_action, dqn_action, a2c_action])

        # Step the environment with the final action
        obs, reward, done, truncated, info = env_test.step(final_action)

    # Generate report for the testing session
    env_test.generate_report()

    # Calculate and display testing metrics
    testing_metrics = calculate_metrics(env_test.trades, env_test.initial_balance, env_test.balance)
    print("\n--- Testing Metrics ---")
    for metric, value in testing_metrics.items():
        print(f"{metric}: {value}")

# Run the training and evaluation
train_and_evaluate()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1811 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1559        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010276195 |
|    clip_fraction        | 0.041       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -34.7       |
|   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+03    |
|    ep_rew_mean          | -0.16       |
| time/                   |             |
|    fps                  | 1349        |
|    iterations           | 11          |
|    time_elapsed         | 16          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.008465815 |
|    clip_fraction        | 0.0745      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | -0.0629     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0336     |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0106     |
|    value_loss           | 0.0012      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93e+03    |
|    ep_rew_mean          | 2.7         |
| time/                   |             |
|    fps                  | 1330        |
|    iterations           | 21          |
|    time_elapsed         | 32          |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.006683612 |
|    clip_fraction        | 0.0872      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.931      |
|    explained_variance   | 0.0774      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0353     |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.00763    |
|    value_loss           | 0.00424     |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.93

------------------------------------
| time/                 |          |
|    fps                | 979      |
|    iterations         | 800      |
|    time_elapsed       | 4        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.772   |
|    explained_variance | -37.5    |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | -0.0116  |
|    value_loss         | 0.000552 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 979      |
|    iterations         | 900      |
|    time_elapsed       | 4        |
|    total_timesteps    | 4500     |
| train/                |          |
|    entropy_loss       | -0.867   |
|    explained_variance | -2.76    |
|    learning_rate      | 0.0007   |
|    n_updates          | 899      |
|    policy_loss        | 0.154    |
|    value_loss         | 0.0517   |
-

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.169   |
| time/                 |          |
|    fps                | 983      |
|    iterations         | 2300     |
|    time_elapsed       | 11       |
|    total_timesteps    | 11500    |
| train/                |          |
|    entropy_loss       | -0.601   |
|    explained_variance | -149     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2299     |
|    policy_loss        | -0.00385 |
|    value_loss         | 0.000331 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.169   |
| time/                 |          |
|    fps                | 984      |
|    iterations         | 2400     |
|    time_elapsed       | 12       |
|    total_timesteps    | 12000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.101    |
| time/                 |          |
|    fps                | 985      |
|    iterations         | 3600     |
|    time_elapsed       | 18       |
|    total_timesteps    | 18000    |
| train/                |          |
|    entropy_loss       | -0.723   |
|    explained_variance | 0.367    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3599     |
|    policy_loss        | 0.000747 |
|    value_loss         | 3.58e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.101    |
| time/                 |          |
|    fps                | 984      |
|    iterations         | 3700     |
|    time_elapsed       | 18       |
|    total_timesteps    | 18500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0836   |
| time/                 |           |
|    fps                | 975       |
|    iterations         | 4900      |
|    time_elapsed       | 25        |
|    total_timesteps    | 24500     |
| train/                |           |
|    entropy_loss       | -0.258    |
|    explained_variance | -3.44     |
|    learning_rate      | 0.0007    |
|    n_updates          | 4899      |
|    policy_loss        | -1.27e-05 |
|    value_loss         | 1.18e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.0836  |
| time/                 |          |
|    fps                | 974      |
|    iterations         | 5000     |
|    time_elapsed       | 25       |
|    total_timesteps    | 25000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | -0.0836  |
| time/                 |          |
|    fps                | 922      |
|    iterations         | 6200     |
|    time_elapsed       | 33       |
|    total_timesteps    | 31000    |
| train/                |          |
|    entropy_loss       | -0.0563  |
|    explained_variance | -731     |
|    learning_rate      | 0.0007   |
|    n_updates          | 6199     |
|    policy_loss        | 3.78e-05 |
|    value_loss         | 2.12e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | -0.0836   |
| time/                 |           |
|    fps                | 922       |
|    iterations         | 6300      |
|    time_elapsed       | 34        |
|    total_timesteps    | 31500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.401    |
| time/                 |          |
|    fps                | 926      |
|    iterations         | 7500     |
|    time_elapsed       | 40       |
|    total_timesteps    | 37500    |
| train/                |          |
|    entropy_loss       | -0.679   |
|    explained_variance | -0.601   |
|    learning_rate      | 0.0007   |
|    n_updates          | 7499     |
|    policy_loss        | -0.00474 |
|    value_loss         | 0.000313 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.93e+03  |
|    ep_rew_mean        | 0.401     |
| time/                 |           |
|    fps                | 927       |
|    iterations         | 7600      |
|    time_elapsed       | 40        |
|    total_timesteps    | 38000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.655    |
| time/                 |          |
|    fps                | 935      |
|    iterations         | 8800     |
|    time_elapsed       | 47       |
|    total_timesteps    | 44000    |
| train/                |          |
|    entropy_loss       | -1.07    |
|    explained_variance | -9.38    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8799     |
|    policy_loss        | -0.00475 |
|    value_loss         | 2.99e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.93e+03 |
|    ep_rew_mean        | 0.655    |
| time/                 |          |
|    fps                | 935      |
|    iterations         | 8900     |
|    time_elapsed       | 47       |
|    total_timesteps    | 44500    |
| train/                |          |
|


--- Training Metrics ---
Total Profit: 62.6709000000028
Cumulative Return: 0.0062670900000002805
Win Rate: 0.5440666204024983
Profit Factor: 1.8685629626344507
Sharpe Ratio: 0.14661302900559006
Sortino Ratio: 0.3112348782986499
Maximum Drawdown: 0.00038029986651420095

--- Agent Report ---
Agent starts with 0 holdings (neutral position), Initial Balance: 10000
Agent buys at 29.793, Current Balance: 10000, Holdings: 1 Long
Agent closes long at 30.375999999999998, profit: 0.5829999999999984, Current Balance: 10000.583, Holdings: 0
Agent buys at 30.344, Current Balance: 10000.583, Holdings: 1 Long
Agent closes long at 29.72, profit: -0.6240000000000023, Current Balance: 9999.959, Holdings: 0
Agent buys at 28.627000000000002, Current Balance: 9999.959, Holdings: 1 Long
Agent closes long at 28.781, profit: 0.15399999999999636, Current Balance: 10000.113000000001, Holdings: 0
Agent buys at 28.5055, Current Balance: 10000.113000000001, Holdings: 1 Long
Agent closes long at 27.945, profit: -0