In [2]:
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder
from stable_baselines3.common.evaluation import evaluate_policy
import gymnasium
import ale_py
import os
import optuna 

In [3]:
def obj(trial):
    # Environment creation
    env = make_atari_env("SpaceInvadersNoFrameskip-v4",n_envs=4,seed=0)
    # Stack 4 frames
    env = VecFrameStack(env, n_stack=4)

    learning_starts = trial.suggest_int("learning_starts", 1000, 100_000)
    target_update_interval = trial.suggest_int("target_update_interval", 1, 1000)
    exploration_fraction = trial.suggest_float("exploration_fraction", 0.1, 0.5)
    exploration_final_eps = trial.suggest_float("exploration_final_eps", 0.01, 0.1)
    batch_size = trial.suggest_int("batch_size", 32, 128)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-3)
    gradient_steps = trial.suggest_int("gradient_steps", 1, 10)
    buffer_size = trial.suggest_int("buffer_size", 1000, 100_000)
    train_freq = trial.suggest_int("train_freq", 1, 10)

    # Create the agent and train it
    agent = DQN("CnnPolicy", env, verbose=1,buffer_size=buffer_size, learning_starts=learning_starts, train_freq=train_freq, 
                target_update_interval=target_update_interval, exploration_fraction=exploration_fraction, 
                exploration_final_eps=exploration_final_eps, batch_size=batch_size, learning_rate=learning_rate, 
                gradient_steps=gradient_steps, optimize_memory_usage=False)
    agent.learn(total_timesteps=10_000)

    mean_reward, _= evaluate_policy(agent, env, n_eval_episodes=10, deterministic=True)
    env.close()
    return mean_reward

In [4]:
study = optuna.create_study(direction="maximize")
study.optimize(obj, n_trials=100, n_jobs=1)

print("Best hyperparameters:", study.best_params)

[I 2024-11-27 09:43:41,746] A new study created in memory with name: no-name-90357ff5-cd61-4046-97e5-74c55de8c728
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-3)


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.712    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 204      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.638    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 223      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.488    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:44:56,159] Trial 0 finished with value: 18.5 and parameters: {'learning_starts': 32476, 'target_update_interval': 450, 'exploration_fraction': 0.4740410447777754, 'exploration_final_eps': 0.09688922499719223, 'batch_size': 99, 'learning_rate': 2.120879758601238e-05, 'gradient_steps': 10, 'buffer_size': 44364, 'train_freq': 9}. Best is trial 0 with value: 18.5.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-3)


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.701    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.624    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 197      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.468    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:46:02,043] Trial 1 finished with value: 18.5 and parameters: {'learning_starts': 36166, 'target_update_interval': 659, 'exploration_fraction': 0.4600726076597029, 'exploration_final_eps': 0.0887979820602534, 'batch_size': 123, 'learning_rate': 1.3383483450661942e-05, 'gradient_steps': 7, 'buffer_size': 91095, 'train_freq': 4}. Best is trial 0 with value: 18.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.636    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 227      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.543    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 227      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.354    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:47:06,359] Trial 2 finished with value: 284.5 and parameters: {'learning_starts': 28506, 'target_update_interval': 957, 'exploration_fraction': 0.3944277513990532, 'exploration_final_eps': 0.051610585800878794, 'batch_size': 46, 'learning_rate': 0.00010940794353500171, 'gradient_steps': 7, 'buffer_size': 24526, 'train_freq': 1}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.328    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 191      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.155    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 196      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0522   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:48:11,000] Trial 3 finished with value: 18.5 and parameters: {'learning_starts': 84542, 'target_update_interval': 602, 'exploration_fraction': 0.21320295050990473, 'exploration_final_eps': 0.05223155135317888, 'batch_size': 119, 'learning_rate': 2.7226483390603868e-05, 'gradient_steps': 5, 'buffer_size': 90056, 'train_freq': 5}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.652    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.563    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 197      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.381    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:49:14,270] Trial 4 finished with value: 18.5 and parameters: {'learning_starts': 95927, 'target_update_interval': 988, 'exploration_fraction': 0.4217085776079653, 'exploration_final_eps': 0.029412153223371727, 'batch_size': 44, 'learning_rate': 2.4801817302728295e-05, 'gradient_steps': 4, 'buffer_size': 42849, 'train_freq': 4}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.196    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0998   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 200      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0998   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:50:07,425] Trial 5 finished with value: 18.5 and parameters: {'learning_starts': 99737, 'target_update_interval': 164, 'exploration_fraction': 0.16938607873604505, 'exploration_final_eps': 0.09983533308443855, 'batch_size': 54, 'learning_rate': 0.000967882925166953, 'gradient_steps': 10, 'buffer_size': 38558, 'train_freq': 2}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.646    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 387      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.555    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 402      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.371    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:50:47,954] Trial 6 finished with value: 284.5 and parameters: {'learning_starts': 81586, 'target_update_interval': 63, 'exploration_fraction': 0.4138555708199648, 'exploration_final_eps': 0.03103186385852985, 'batch_size': 64, 'learning_rate': 0.00010860504577407188, 'gradient_steps': 1, 'buffer_size': 44938, 'train_freq': 5}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.566    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 190      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.455    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 195      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.228    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:51:45,009] Trial 7 finished with value: 18.5 and parameters: {'learning_starts': 75113, 'target_update_interval': 152, 'exploration_fraction': 0.34480795201681036, 'exploration_final_eps': 0.010245266608774175, 'batch_size': 100, 'learning_rate': 5.272618728621562e-05, 'gradient_steps': 7, 'buffer_size': 40164, 'train_freq': 6}. Best is trial 2 with value: 284.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.72     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 231      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.648    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 233      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.502    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:52:55,181] Trial 8 finished with value: 343.0 and parameters: {'learning_starts': 79397, 'target_update_interval': 823, 'exploration_fraction': 0.4979782775516671, 'exploration_final_eps': 0.07656543126488856, 'batch_size': 47, 'learning_rate': 0.0006381165544489938, 'gradient_steps': 4, 'buffer_size': 42612, 'train_freq': 4}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.461    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 196      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.322    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 198      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0412   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:54:06,098] Trial 9 finished with value: 321.0 and parameters: {'learning_starts': 86647, 'target_update_interval': 425, 'exploration_fraction': 0.2746566394351989, 'exploration_final_eps': 0.020359536854671526, 'batch_size': 127, 'learning_rate': 1.62090017010582e-05, 'gradient_steps': 1, 'buffer_size': 9670, 'train_freq': 6}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0707   |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0707   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 204      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0707   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:55:10,357] Trial 10 finished with value: 187.5 and parameters: {'learning_starts': 59239, 'target_update_interval': 815, 'exploration_fraction': 0.11256082893075539, 'exploration_final_eps': 0.07070242423341219, 'batch_size': 33, 'learning_rate': 0.0008997749629668368, 'gradient_steps': 3, 'buffer_size': 65357, 'train_freq': 9}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.495    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.366    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 199      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.102    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:56:16,318] Trial 11 finished with value: 20.0 and parameters: {'learning_starts': 64343, 'target_update_interval': 342, 'exploration_fraction': 0.2784798357238477, 'exploration_final_eps': 0.07007420425407954, 'batch_size': 81, 'learning_rate': 0.000307329301275862, 'gradient_steps': 1, 'buffer_size': 6356, 'train_freq': 7}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.508    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 201      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.382    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 209      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.126    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:57:27,681] Trial 12 finished with value: 313.0 and parameters: {'learning_starts': 10811, 'target_update_interval': 356, 'exploration_fraction': 0.28443999813574794, 'exploration_final_eps': 0.07513077260324894, 'batch_size': 74, 'learning_rate': 0.0002652969077612543, 'gradient_steps': 3, 'buffer_size': 10983, 'train_freq': 7}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.562    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.449    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 200      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.221    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:58:38,443] Trial 13 finished with value: 316.0 and parameters: {'learning_starts': 72318, 'target_update_interval': 750, 'exploration_fraction': 0.3330093271448232, 'exploration_final_eps': 0.03514641430520173, 'batch_size': 94, 'learning_rate': 0.0003363007737579405, 'gradient_steps': 2, 'buffer_size': 72267, 'train_freq': 3}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.379    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 239      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.22     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 242      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0785   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 09:59:47,473] Trial 14 finished with value: 313.0 and parameters: {'learning_starts': 48681, 'target_update_interval': 532, 'exploration_fraction': 0.22434119968761468, 'exploration_final_eps': 0.07846828534355584, 'batch_size': 115, 'learning_rate': 5.9465168133663324e-05, 'gradient_steps': 5, 'buffer_size': 22662, 'train_freq': 7}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.571    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 205      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.461    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 208      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.238    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:00:57,562] Trial 15 finished with value: 343.0 and parameters: {'learning_starts': 89048, 'target_update_interval': 830, 'exploration_fraction': 0.348590127341943, 'exploration_final_eps': 0.011467186912097915, 'batch_size': 84, 'learning_rate': 1.1730665532759662e-05, 'gradient_steps': 3, 'buffer_size': 61884, 'train_freq': 10}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.714    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 163      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.641    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 172      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.491    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:02:04,737] Trial 16 finished with value: 99.0 and parameters: {'learning_starts': 51069, 'target_update_interval': 825, 'exploration_fraction': 0.49630209458314445, 'exploration_final_eps': 0.06111050317101938, 'batch_size': 80, 'learning_rate': 1.0249514487542736e-05, 'gradient_steps': 3, 'buffer_size': 63329, 'train_freq': 9}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.603    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.502    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 198      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.295    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:03:11,069] Trial 17 finished with value: 274.0 and parameters: {'learning_starts': 90932, 'target_update_interval': 886, 'exploration_fraction': 0.36614951180832883, 'exploration_final_eps': 0.03976489575413878, 'batch_size': 60, 'learning_rate': 0.0005007228929750895, 'gradient_steps': 6, 'buffer_size': 57026, 'train_freq': 10}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.683    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 279      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.602    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 251      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.437    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:04:09,412] Trial 18 finished with value: 257.5 and parameters: {'learning_starts': 69055, 'target_update_interval': 724, 'exploration_fraction': 0.4477255679362988, 'exploration_final_eps': 0.06144887334771218, 'batch_size': 70, 'learning_rate': 0.00016242040271536651, 'gradient_steps': 4, 'buffer_size': 84261, 'train_freq': 3}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.632    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 222      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.538    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 245      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.346    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:06:56,301] Trial 19 finished with value: 185.5 and parameters: {'learning_starts': 4078, 'target_update_interval': 602, 'exploration_fraction': 0.3751694761742459, 'exploration_final_eps': 0.08752890641949711, 'batch_size': 88, 'learning_rate': 4.1504987659289085e-05, 'gradient_steps': 4, 'buffer_size': 74244, 'train_freq': 8}. Best is trial 8 with value: 343.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.565    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 202      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.454    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 204      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.227    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:08:07,792] Trial 20 finished with value: 362.5 and parameters: {'learning_starts': 78195, 'target_update_interval': 877, 'exploration_fraction': 0.33246490132842166, 'exploration_final_eps': 0.04387032804807812, 'batch_size': 110, 'learning_rate': 0.00017284697205761188, 'gradient_steps': 8, 'buffer_size': 29580, 'train_freq': 10}. Best is trial 20 with value: 362.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.537    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 215      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.419    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 224      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.178    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:09:19,546] Trial 21 finished with value: 343.0 and parameters: {'learning_starts': 78762, 'target_update_interval': 877, 'exploration_fraction': 0.31272233918219405, 'exploration_final_eps': 0.043194445663515095, 'batch_size': 111, 'learning_rate': 0.000169888501045415, 'gradient_steps': 9, 'buffer_size': 28790, 'train_freq': 10}. Best is trial 20 with value: 362.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.37     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 252      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.208    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 240      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0133   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:10:26,190] Trial 22 finished with value: 284.5 and parameters: {'learning_starts': 56660, 'target_update_interval': 729, 'exploration_fraction': 0.23669208105273798, 'exploration_final_eps': 0.013262695818152996, 'batch_size': 107, 'learning_rate': 0.0006276486117128723, 'gradient_steps': 8, 'buffer_size': 55370, 'train_freq': 10}. Best is trial 20 with value: 362.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.551    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 214      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.436    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 218      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.202    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:11:47,358] Trial 23 finished with value: 313.5 and parameters: {'learning_starts': 91132, 'target_update_interval': 914, 'exploration_fraction': 0.3309407120262798, 'exploration_final_eps': 0.017934941963605715, 'batch_size': 89, 'learning_rate': 0.00019338763743248266, 'gradient_steps': 6, 'buffer_size': 32093, 'train_freq': 8}. Best is trial 20 with value: 362.5.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.641    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.549    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 188      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.363    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:13:10,501] Trial 24 finished with value: 372.0 and parameters: {'learning_starts': 77858, 'target_update_interval': 816, 'exploration_fraction': 0.4121067308719372, 'exploration_final_eps': 0.022796361281554274, 'batch_size': 105, 'learning_rate': 8.891464233399033e-05, 'gradient_steps': 2, 'buffer_size': 50212, 'train_freq': 8}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.66     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 185      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.573    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.396    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:14:10,303] Trial 25 finished with value: 18.5 and parameters: {'learning_starts': 68034, 'target_update_interval': 671, 'exploration_fraction': 0.4333959243174368, 'exploration_final_eps': 0.02538115351731493, 'batch_size': 108, 'learning_rate': 7.692129911183029e-05, 'gradient_steps': 2, 'buffer_size': 20023, 'train_freq': 8}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.709    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 222      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.635    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 222      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.483    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:15:25,611] Trial 26 finished with value: 362.5 and parameters: {'learning_starts': 43176, 'target_update_interval': 786, 'exploration_fraction': 0.4967624573866659, 'exploration_final_eps': 0.044786698123396085, 'batch_size': 101, 'learning_rate': 0.000521899625916189, 'gradient_steps': 8, 'buffer_size': 35201, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.635    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 191      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.542    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 196      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.352    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:16:37,859] Trial 27 finished with value: 294.0 and parameters: {'learning_starts': 41894, 'target_update_interval': 984, 'exploration_fraction': 0.39584283614364274, 'exploration_final_eps': 0.04518988986352518, 'batch_size': 102, 'learning_rate': 0.00041162042745115294, 'gradient_steps': 8, 'buffer_size': 15641, 'train_freq': 9}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.687    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 191      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.607    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 194      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.444    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:17:43,317] Trial 28 finished with value: 18.5 and parameters: {'learning_starts': 43365, 'target_update_interval': 746, 'exploration_fraction': 0.4652009182053855, 'exploration_final_eps': 0.03722626624600894, 'batch_size': 114, 'learning_rate': 0.00022949371532155564, 'gradient_steps': 9, 'buffer_size': 33506, 'train_freq': 6}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.697    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 210      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.619    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 211      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.461    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:18:42,731] Trial 29 finished with value: 18.5 and parameters: {'learning_starts': 23147, 'target_update_interval': 568, 'exploration_fraction': 0.4748854229378465, 'exploration_final_eps': 0.048016857106831165, 'batch_size': 94, 'learning_rate': 0.00012727327707419135, 'gradient_steps': 10, 'buffer_size': 50281, 'train_freq': 8}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.644    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 212      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.552    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 215      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.367    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:19:40,357] Trial 30 finished with value: 18.5 and parameters: {'learning_starts': 60327, 'target_update_interval': 660, 'exploration_fraction': 0.3986644965343526, 'exploration_final_eps': 0.060742520198997126, 'batch_size': 102, 'learning_rate': 8.172760223774372e-05, 'gradient_steps': 8, 'buffer_size': 50682, 'train_freq': 9}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.704    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 187      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.628    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.474    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:20:41,598] Trial 31 finished with value: 18.5 and parameters: {'learning_starts': 78728, 'target_update_interval': 805, 'exploration_fraction': 0.49781049946694467, 'exploration_final_eps': 0.0251339847284229, 'batch_size': 95, 'learning_rate': 0.0006098226942455682, 'gradient_steps': 9, 'buffer_size': 36305, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.704    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 205      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.628    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 208      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.473    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:21:42,643] Trial 32 finished with value: 18.5 and parameters: {'learning_starts': 51461, 'target_update_interval': 916, 'exploration_fraction': 0.4813794530419384, 'exploration_final_eps': 0.05705498975364698, 'batch_size': 118, 'learning_rate': 0.0004112860811410502, 'gradient_steps': 6, 'buffer_size': 50045, 'train_freq': 3}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.69     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 246      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.611    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 239      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.45     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:22:45,600] Trial 33 finished with value: 18.5 and parameters: {'learning_starts': 31701, 'target_update_interval': 786, 'exploration_fraction': 0.4436236322331555, 'exploration_final_eps': 0.09182699520012776, 'batch_size': 123, 'learning_rate': 0.0006961051438038267, 'gradient_steps': 7, 'buffer_size': 31152, 'train_freq': 5}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.696    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.618    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 199      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.46     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:23:57,147] Trial 34 finished with value: 343.0 and parameters: {'learning_starts': 23110, 'target_update_interval': 875, 'exploration_fraction': 0.45639934365258583, 'exploration_final_eps': 0.08230118390908787, 'batch_size': 33, 'learning_rate': 3.598465382468416e-05, 'gradient_steps': 2, 'buffer_size': 27495, 'train_freq': 2}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.612    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 177      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.513    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 185      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.311    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:25:06,617] Trial 35 finished with value: 362.5 and parameters: {'learning_starts': 74006, 'target_update_interval': 669, 'exploration_fraction': 0.3700050020117286, 'exploration_final_eps': 0.051312254163949834, 'batch_size': 106, 'learning_rate': 0.000478269895036204, 'gradient_steps': 5, 'buffer_size': 45315, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.613    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 191      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.514    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 199      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.313    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:26:09,686] Trial 36 finished with value: 18.5 and parameters: {'learning_starts': 38391, 'target_update_interval': 672, 'exploration_fraction': 0.371320573052321, 'exploration_final_eps': 0.050536226444692516, 'batch_size': 107, 'learning_rate': 0.00013668672821455648, 'gradient_steps': 5, 'buffer_size': 17301, 'train_freq': 1}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.652    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 214      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.563    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 234      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.381    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:27:14,838] Trial 37 finished with value: 134.0 and parameters: {'learning_starts': 64578, 'target_update_interval': 601, 'exploration_fraction': 0.41514420019960313, 'exploration_final_eps': 0.04422433704347349, 'batch_size': 121, 'learning_rate': 0.00022164181725425972, 'gradient_steps': 8, 'buffer_size': 45034, 'train_freq': 5}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.528    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 188      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.407    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 187      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.16     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:28:25,471] Trial 38 finished with value: 362.5 and parameters: {'learning_starts': 95848, 'target_update_interval': 467, 'exploration_fraction': 0.30980065279935165, 'exploration_final_eps': 0.03233629161935267, 'batch_size': 128, 'learning_rate': 0.0003801263555463899, 'gradient_steps': 7, 'buffer_size': 25239, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.437    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.293    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 192      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0557   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:29:39,401] Trial 39 finished with value: 327.0 and parameters: {'learning_starts': 85198, 'target_update_interval': 958, 'exploration_fraction': 0.25377718370431107, 'exploration_final_eps': 0.05572377789003374, 'batch_size': 99, 'learning_rate': 8.706268177681778e-05, 'gradient_steps': 10, 'buffer_size': 99360, 'train_freq': 2}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.62     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 213      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.522    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 218      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.324    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:30:44,697] Trial 40 finished with value: 284.5 and parameters: {'learning_starts': 74135, 'target_update_interval': 704, 'exploration_fraction': 0.38159474448109537, 'exploration_final_eps': 0.04036914608519614, 'batch_size': 105, 'learning_rate': 0.000479041523708332, 'gradient_steps': 6, 'buffer_size': 37773, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.538    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 209      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.419    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 212      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.179    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:31:53,892] Trial 41 finished with value: 343.0 and parameters: {'learning_starts': 93669, 'target_update_interval': 458, 'exploration_fraction': 0.3175094083453181, 'exploration_final_eps': 0.02985134521310643, 'batch_size': 127, 'learning_rate': 0.00033816665444013516, 'gradient_steps': 7, 'buffer_size': 23900, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.522    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 196      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.399    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 199      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.15     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:33:00,724] Trial 42 finished with value: 18.5 and parameters: {'learning_starts': 99391, 'target_update_interval': 318, 'exploration_fraction': 0.3054887282606647, 'exploration_final_eps': 0.034191937132757325, 'batch_size': 112, 'learning_rate': 0.0008314344284497823, 'gradient_steps': 7, 'buffer_size': 46597, 'train_freq': 5}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.579    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 184      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.471    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 192      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.251    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:34:12,511] Trial 43 finished with value: 321.0 and parameters: {'learning_starts': 82737, 'target_update_interval': 392, 'exploration_fraction': 0.3497155358744208, 'exploration_final_eps': 0.02591275946985731, 'batch_size': 119, 'learning_rate': 0.00026997205824301777, 'gradient_steps': 8, 'buffer_size': 38266, 'train_freq': 3}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.5      |
| time/               |          |
|    episodes         | 4        |
|    fps              | 193      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.372    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 199      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.112    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:35:18,658] Trial 44 finished with value: 18.5 and parameters: {'learning_starts': 95642, 'target_update_interval': 493, 'exploration_fraction': 0.29278094044072256, 'exploration_final_eps': 0.032712395111325926, 'batch_size': 96, 'learning_rate': 0.0004552791354407201, 'gradient_steps': 9, 'buffer_size': 27231, 'train_freq': 6}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.253    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 206      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0609   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 202      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0217   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:36:28,286] Trial 45 finished with value: 362.5 and parameters: {'learning_starts': 76216, 'target_update_interval': 621, 'exploration_fraction': 0.19791745228346305, 'exploration_final_eps': 0.02172427560788349, 'batch_size': 128, 'learning_rate': 6.262803025766113e-05, 'gradient_steps': 7, 'buffer_size': 41076, 'train_freq': 5}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.658    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 186      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.57     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 191      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.391    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:37:40,213] Trial 46 finished with value: 362.5 and parameters: {'learning_starts': 86370, 'target_update_interval': 295, 'exploration_fraction': 0.4188161309896137, 'exploration_final_eps': 0.05182296273169966, 'batch_size': 111, 'learning_rate': 0.00012645387449410867, 'gradient_steps': 5, 'buffer_size': 54403, 'train_freq': 4}. Best is trial 24 with value: 372.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.438    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 210      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.294    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 215      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.038    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:38:50,825] Trial 47 finished with value: 376.0 and parameters: {'learning_starts': 71837, 'target_update_interval': 237, 'exploration_fraction': 0.2589892072881381, 'exploration_final_eps': 0.03801659272554749, 'batch_size': 117, 'learning_rate': 0.0007658002238434514, 'gradient_steps': 7, 'buffer_size': 2530, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.457    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 284      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.318    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 312      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0665   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:39:28,671] Trial 48 finished with value: 18.5 and parameters: {'learning_starts': 70375, 'target_update_interval': 30, 'exploration_fraction': 0.25992398394380234, 'exploration_final_eps': 0.06651541510395517, 'batch_size': 116, 'learning_rate': 0.0008380381841724939, 'gradient_steps': 8, 'buffer_size': 1976, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.223    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0478   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 202      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0478   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:40:33,389] Trial 49 finished with value: 18.5 and parameters: {'learning_starts': 63328, 'target_update_interval': 239, 'exploration_fraction': 0.18516715520210908, 'exploration_final_eps': 0.04783743790941131, 'batch_size': 104, 'learning_rate': 0.0009538374626756505, 'gradient_steps': 9, 'buffer_size': 13522, 'train_freq': 1}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0384   |
| time/               |          |
|    episodes         | 4        |
|    fps              | 206      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0384   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 210      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0384   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:41:43,470] Trial 50 finished with value: 343.0 and parameters: {'learning_starts': 55589, 'target_update_interval': 783, 'exploration_fraction': 0.1423058933871092, 'exploration_final_eps': 0.038381613779681745, 'batch_size': 90, 'learning_rate': 0.0005510171811925282, 'gradient_steps': 6, 'buffer_size': 5433, 'train_freq': 2}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.55     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 200      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.434    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 203      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.199    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:42:41,872] Trial 51 finished with value: 343.0 and parameters: {'learning_starts': 81476, 'target_update_interval': 134, 'exploration_fraction': 0.3217055370531712, 'exploration_final_eps': 0.04160649257195171, 'batch_size': 123, 'learning_rate': 0.0003234532562183199, 'gradient_steps': 7, 'buffer_size': 19246, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.582    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 202      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.475    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 208      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.258    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:43:50,044] Trial 52 finished with value: 18.5 and parameters: {'learning_starts': 74936, 'target_update_interval': 212, 'exploration_fraction': 0.35126314205445697, 'exploration_final_eps': 0.029853380770077258, 'batch_size': 123, 'learning_rate': 0.00038063372165355247, 'gradient_steps': 7, 'buffer_size': 34170, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.467    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 193      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.33     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 195      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0519   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:44:44,310] Trial 53 finished with value: 20.0 and parameters: {'learning_starts': 46992, 'target_update_interval': 94, 'exploration_fraction': 0.27344026874461974, 'exploration_final_eps': 0.03552467470987618, 'batch_size': 99, 'learning_rate': 0.0007529197662715141, 'gradient_steps': 8, 'buffer_size': 59417, 'train_freq': 7}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.507    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.38     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.123    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:45:49,684] Trial 54 finished with value: 284.5 and parameters: {'learning_starts': 66784, 'target_update_interval': 543, 'exploration_fraction': 0.2917116073884356, 'exploration_final_eps': 0.048130633802736, 'batch_size': 110, 'learning_rate': 0.0005615471965757732, 'gradient_steps': 5, 'buffer_size': 46985, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.558    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 209      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.445    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 217      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.215    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:46:55,910] Trial 55 finished with value: 255.5 and parameters: {'learning_starts': 70981, 'target_update_interval': 849, 'exploration_fraction': 0.3361005282337147, 'exploration_final_eps': 0.018571052161288883, 'batch_size': 115, 'learning_rate': 9.821497528246455e-05, 'gradient_steps': 4, 'buffer_size': 9430, 'train_freq': 6}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.406    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 199      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.253    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 205      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0539   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:48:05,322] Trial 56 finished with value: 362.5 and parameters: {'learning_starts': 89906, 'target_update_interval': 932, 'exploration_fraction': 0.24068404699564064, 'exploration_final_eps': 0.053870018064443, 'batch_size': 119, 'learning_rate': 0.000270752339273182, 'gradient_steps': 1, 'buffer_size': 69825, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.602    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 206      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.5      |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.293    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:49:07,037] Trial 57 finished with value: 18.5 and parameters: {'learning_starts': 78174, 'target_update_interval': 759, 'exploration_fraction': 0.3630932934151049, 'exploration_final_eps': 0.04440520349802614, 'batch_size': 85, 'learning_rate': 0.0006930059854778814, 'gradient_steps': 7, 'buffer_size': 41058, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.631    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 213      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.536    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 218      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.344    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:50:10,658] Trial 58 finished with value: 18.5 and parameters: {'learning_starts': 98615, 'target_update_interval': 701, 'exploration_fraction': 0.39838941544884426, 'exploration_final_eps': 0.02750685699482429, 'batch_size': 124, 'learning_rate': 0.00015408028150453373, 'gradient_steps': 9, 'buffer_size': 21776, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.654    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 210      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.565    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 216      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.385    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:51:23,363] Trial 59 finished with value: 362.5 and parameters: {'learning_starts': 59715, 'target_update_interval': 425, 'exploration_fraction': 0.43069497449010574, 'exploration_final_eps': 0.014834732654628119, 'batch_size': 76, 'learning_rate': 0.00022923918202250314, 'gradient_steps': 2, 'buffer_size': 26717, 'train_freq': 2}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.613    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 193      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.514    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.312    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:52:31,098] Trial 60 finished with value: 18.5 and parameters: {'learning_starts': 84373, 'target_update_interval': 855, 'exploration_fraction': 0.38195071220867466, 'exploration_final_eps': 0.022696619374103805, 'batch_size': 108, 'learning_rate': 1.6252640649041727e-05, 'gradient_steps': 6, 'buffer_size': 30241, 'train_freq': 10}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.207    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 211      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0211   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 213      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0211   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:53:28,137] Trial 61 finished with value: 127.0 and parameters: {'learning_starts': 76776, 'target_update_interval': 576, 'exploration_fraction': 0.18659624558932664, 'exploration_final_eps': 0.021066125350808085, 'batch_size': 128, 'learning_rate': 4.105757946565292e-05, 'gradient_steps': 7, 'buffer_size': 41831, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.289    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 199      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.106    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 204      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0164   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:54:33,197] Trial 62 finished with value: 18.5 and parameters: {'learning_starts': 88017, 'target_update_interval': 633, 'exploration_fraction': 0.20902612667169962, 'exploration_final_eps': 0.016416917687334373, 'batch_size': 128, 'learning_rate': 5.412277285416221e-05, 'gradient_steps': 8, 'buffer_size': 35373, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.309    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 188      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.132    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0325   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:55:37,650] Trial 63 finished with value: 181.0 and parameters: {'learning_starts': 74191, 'target_update_interval': 627, 'exploration_fraction': 0.21184780133364445, 'exploration_final_eps': 0.03249726764252702, 'batch_size': 114, 'learning_rate': 9.397830779407948e-05, 'gradient_steps': 7, 'buffer_size': 42482, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.037    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 193      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.037    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 198      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.037    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:56:44,499] Trial 64 finished with value: 20.0 and parameters: {'learning_starts': 81855, 'target_update_interval': 498, 'exploration_fraction': 0.10184463420533131, 'exploration_final_eps': 0.03702904432478693, 'batch_size': 125, 'learning_rate': 7.726794570467754e-05, 'gradient_steps': 6, 'buffer_size': 48296, 'train_freq': 7}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.464    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.327    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0587   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:57:49,573] Trial 65 finished with value: 223.0 and parameters: {'learning_starts': 92790, 'target_update_interval': 766, 'exploration_fraction': 0.2656773966126715, 'exploration_final_eps': 0.058745077229519396, 'batch_size': 120, 'learning_rate': 6.640337976310452e-05, 'gradient_steps': 8, 'buffer_size': 54035, 'train_freq': 6}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0228   |
| time/               |          |
|    episodes         | 4        |
|    fps              | 234      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0228   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 231      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0228   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:58:49,447] Trial 66 finished with value: 94.0 and parameters: {'learning_starts': 54523, 'target_update_interval': 697, 'exploration_fraction': 0.1419283261946893, 'exploration_final_eps': 0.02282164305149688, 'batch_size': 116, 'learning_rate': 0.00011216439619072624, 'gradient_steps': 3, 'buffer_size': 39837, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.528    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 199      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.407    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 201      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.162    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 10:59:55,263] Trial 67 finished with value: 18.5 and parameters: {'learning_starts': 66772, 'target_update_interval': 821, 'exploration_fraction': 0.3072816795785187, 'exploration_final_eps': 0.041533399062277994, 'batch_size': 104, 'learning_rate': 6.415196859297703e-05, 'gradient_steps': 7, 'buffer_size': 51227, 'train_freq': 9}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.591    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 186      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.487    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 189      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.274    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:00:58,955] Trial 68 finished with value: 257.5 and parameters: {'learning_starts': 44438, 'target_update_interval': 532, 'exploration_fraction': 0.3597076419023348, 'exploration_final_eps': 0.027863852836823723, 'batch_size': 112, 'learning_rate': 2.9187154008030853e-05, 'gradient_steps': 8, 'buffer_size': 44064, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.567    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 190      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.456    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 191      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.23     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:02:10,246] Trial 69 finished with value: 362.5 and parameters: {'learning_starts': 39738, 'target_update_interval': 641, 'exploration_fraction': 0.33279001585266216, 'exploration_final_eps': 0.0468205105562642, 'batch_size': 101, 'learning_rate': 0.000196836667593341, 'gradient_steps': 4, 'buffer_size': 32645, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.485    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 205      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.353    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 211      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0851   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:03:10,716] Trial 70 finished with value: 18.5 and parameters: {'learning_starts': 62185, 'target_update_interval': 888, 'exploration_fraction': 0.29085722512102474, 'exploration_final_eps': 0.010036319810025134, 'batch_size': 42, 'learning_rate': 0.0003885268786156515, 'gradient_steps': 6, 'buffer_size': 25440, 'train_freq': 8}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.645    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 198      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.554    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.369    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:04:11,221] Trial 71 finished with value: 18.5 and parameters: {'learning_starts': 87609, 'target_update_interval': 307, 'exploration_fraction': 0.40486080126254953, 'exploration_final_eps': 0.04939850082618725, 'batch_size': 110, 'learning_rate': 0.0001151005586051859, 'gradient_steps': 5, 'buffer_size': 53801, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.661    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 185      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.574    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 190      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.397    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:05:18,704] Trial 72 finished with value: 18.5 and parameters: {'learning_starts': 80363, 'target_update_interval': 467, 'exploration_fraction': 0.4281583529750155, 'exploration_final_eps': 0.03916784731502553, 'batch_size': 106, 'learning_rate': 0.00013124232634793078, 'gradient_steps': 5, 'buffer_size': 63641, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.633    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 219      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.539    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 220      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.347    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:06:22,935] Trial 73 finished with value: 18.5 and parameters: {'learning_starts': 85470, 'target_update_interval': 188, 'exploration_fraction': 0.3853082426707751, 'exploration_final_eps': 0.06457750074847272, 'batch_size': 112, 'learning_rate': 0.0005183279351863811, 'gradient_steps': 4, 'buffer_size': 56346, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.684    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 174      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.603    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 184      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.439    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:07:43,688] Trial 74 finished with value: 211.0 and parameters: {'learning_starts': 72493, 'target_update_interval': 283, 'exploration_fraction': 0.4533437032777794, 'exploration_final_eps': 0.05337363547655799, 'batch_size': 117, 'learning_rate': 4.7206931184696214e-05, 'gradient_steps': 5, 'buffer_size': 59765, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.706    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 187      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.63     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 196      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.477    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:08:58,442] Trial 75 finished with value: 362.5 and parameters: {'learning_starts': 77128, 'target_update_interval': 268, 'exploration_fraction': 0.48741824325454874, 'exploration_final_eps': 0.05198000549836774, 'batch_size': 98, 'learning_rate': 7.253011595536956e-05, 'gradient_steps': 6, 'buffer_size': 37106, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.652    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 187      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.562    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.381    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:10:12,971] Trial 76 finished with value: 18.5 and parameters: {'learning_starts': 95463, 'target_update_interval': 377, 'exploration_fraction': 0.41486538426343117, 'exploration_final_eps': 0.044670628713004236, 'batch_size': 121, 'learning_rate': 0.00016308040126457855, 'gradient_steps': 7, 'buffer_size': 66355, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.673    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 188      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.589    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 187      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.419    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:11:03,361] Trial 77 finished with value: 230.0 and parameters: {'learning_starts': 34986, 'target_update_interval': 120, 'exploration_fraction': 0.4388247189468292, 'exploration_final_eps': 0.05140105428141963, 'batch_size': 92, 'learning_rate': 0.0006230689010606263, 'gradient_steps': 9, 'buffer_size': 52650, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.542    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 204      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.425    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.187    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:12:06,156] Trial 78 finished with value: 18.5 and parameters: {'learning_starts': 27845, 'target_update_interval': 722, 'exploration_fraction': 0.3200727740975839, 'exploration_final_eps': 0.031521364716761924, 'batch_size': 126, 'learning_rate': 0.0004534414797151167, 'gradient_steps': 8, 'buffer_size': 45070, 'train_freq': 6}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.689    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 210      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.609    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 207      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.446    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:13:07,770] Trial 79 finished with value: 18.5 and parameters: {'learning_starts': 82767, 'target_update_interval': 421, 'exploration_fraction': 0.4684457755601213, 'exploration_final_eps': 0.0351454567140856, 'batch_size': 109, 'learning_rate': 0.00099824496712877, 'gradient_steps': 7, 'buffer_size': 29818, 'train_freq': 9}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.65     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 214      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.561    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 220      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.378    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:14:21,479] Trial 80 finished with value: 18.5 and parameters: {'learning_starts': 92177, 'target_update_interval': 796, 'exploration_fraction': 0.407637551988714, 'exploration_final_eps': 0.057185926224920115, 'batch_size': 65, 'learning_rate': 0.0008113371568197778, 'gradient_steps': 6, 'buffer_size': 49197, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.409    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 222      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.257    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 225      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0542   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:15:31,800] Trial 81 finished with value: 255.5 and parameters: {'learning_starts': 87557, 'target_update_interval': 933, 'exploration_fraction': 0.24192997175313824, 'exploration_final_eps': 0.054194772509544215, 'batch_size': 119, 'learning_rate': 0.0002756121791312895, 'gradient_steps': 2, 'buffer_size': 78961, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.376    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 199      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.215    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 201      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0457   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:16:46,049] Trial 82 finished with value: 362.5 and parameters: {'learning_starts': 90036, 'target_update_interval': 944, 'exploration_fraction': 0.23112686780964595, 'exploration_final_eps': 0.04567695868244028, 'batch_size': 103, 'learning_rate': 0.0002939398681512556, 'gradient_steps': 1, 'buffer_size': 74190, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.417    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 183      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.267    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 191      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0417   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:17:49,420] Trial 83 finished with value: 18.5 and parameters: {'learning_starts': 89972, 'target_update_interval': 981, 'exploration_fraction': 0.24832688379529305, 'exploration_final_eps': 0.04173172973288668, 'batch_size': 118, 'learning_rate': 0.0003509395715188293, 'gradient_steps': 1, 'buffer_size': 67505, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.298    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 190      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.117    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 198      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0501   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:19:03,934] Trial 84 finished with value: 362.5 and parameters: {'learning_starts': 84061, 'target_update_interval': 914, 'exploration_fraction': 0.20449085000010134, 'exploration_final_eps': 0.050083397220920436, 'batch_size': 112, 'learning_rate': 0.00018096094921533156, 'gradient_steps': 1, 'buffer_size': 86159, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.366    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 186      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.204    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 184      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.059    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:20:15,932] Trial 85 finished with value: 18.5 and parameters: {'learning_starts': 75570, 'target_update_interval': 848, 'exploration_fraction': 0.2245590764498199, 'exploration_final_eps': 0.05897708988154536, 'batch_size': 121, 'learning_rate': 0.00020727690596123471, 'gradient_steps': 1, 'buffer_size': 58726, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.272    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 200      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0857   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0559   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:21:26,729] Trial 86 finished with value: 349.0 and parameters: {'learning_starts': 96213, 'target_update_interval': 891, 'exploration_fraction': 0.19619523648509815, 'exploration_final_eps': 0.05588774158475704, 'batch_size': 114, 'learning_rate': 0.00014563292472213126, 'gradient_steps': 8, 'buffer_size': 71505, 'train_freq': 6}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.128    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 203      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.0636   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 203      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0636   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:22:29,790] Trial 87 finished with value: 140.0 and parameters: {'learning_starts': 71770, 'target_update_interval': 999, 'exploration_fraction': 0.1623000528812727, 'exploration_final_eps': 0.06362184023185982, 'batch_size': 106, 'learning_rate': 0.00024573715851602604, 'gradient_steps': 3, 'buffer_size': 38863, 'train_freq': 10}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.516    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 198      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.391    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 202      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.139    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:23:42,979] Trial 88 finished with value: 343.0 and parameters: {'learning_starts': 80456, 'target_update_interval': 243, 'exploration_fraction': 0.29894024089128657, 'exploration_final_eps': 0.04254022172191804, 'batch_size': 125, 'learning_rate': 0.00044910986385289167, 'gradient_steps': 2, 'buffer_size': 14049, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.627    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 213      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.531    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 206      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.337    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:24:53,948] Trial 89 finished with value: 362.5 and parameters: {'learning_starts': 69625, 'target_update_interval': 358, 'exploration_fraction': 0.390373414323707, 'exploration_final_eps': 0.03696435976800456, 'batch_size': 121, 'learning_rate': 0.0005628498336515523, 'gradient_steps': 4, 'buffer_size': 46280, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.586    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.479    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 197      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.263    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:25:58,853] Trial 90 finished with value: 20.0 and parameters: {'learning_starts': 78658, 'target_update_interval': 189, 'exploration_fraction': 0.34536655761837476, 'exploration_final_eps': 0.05321234742636632, 'batch_size': 114, 'learning_rate': 0.0003024546849280235, 'gradient_steps': 1, 'buffer_size': 34322, 'train_freq': 7}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.649    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 201      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.558    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 203      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.375    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:27:07,144] Trial 91 finished with value: 284.5 and parameters: {'learning_starts': 48919, 'target_update_interval': 574, 'exploration_fraction': 0.42356439208253993, 'exploration_final_eps': 0.015502170810454893, 'batch_size': 78, 'learning_rate': 0.0002235149048553432, 'gradient_steps': 2, 'buffer_size': 26219, 'train_freq': 2}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.445    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 183      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.302    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 185      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0189   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:28:16,232] Trial 92 finished with value: 18.5 and parameters: {'learning_starts': 53342, 'target_update_interval': 325, 'exploration_fraction': 0.267188583144888, 'exploration_final_eps': 0.018896084091806133, 'batch_size': 72, 'learning_rate': 0.00018201693381107384, 'gradient_steps': 3, 'buffer_size': 23582, 'train_freq': 2}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.597    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 193      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.494    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 200      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.284    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:29:23,761] Trial 93 finished with value: 18.5 and parameters: {'learning_starts': 57857, 'target_update_interval': 546, 'exploration_fraction': 0.3701103268936913, 'exploration_final_eps': 0.013509262503973489, 'batch_size': 80, 'learning_rate': 0.0002450455994744308, 'gradient_steps': 2, 'buffer_size': 21789, 'train_freq': 2}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.47     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 194      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.334    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 193      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0577   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:30:26,605] Trial 94 finished with value: 18.5 and parameters: {'learning_starts': 65404, 'target_update_interval': 440, 'exploration_fraction': 0.28173739250280905, 'exploration_final_eps': 0.012379180368973829, 'batch_size': 86, 'learning_rate': 0.0003726834391620219, 'gradient_steps': 2, 'buffer_size': 29187, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.657    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.569    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 202      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.39     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:31:36,248] Trial 95 finished with value: 284.5 and parameters: {'learning_starts': 73264, 'target_update_interval': 415, 'exploration_fraction': 0.4295407762546624, 'exploration_final_eps': 0.02471658280242526, 'batch_size': 108, 'learning_rate': 0.00011722084088214255, 'gradient_steps': 3, 'buffer_size': 18472, 'train_freq': 1}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.662    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 195      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.575    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 197      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.398    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:32:44,645] Trial 96 finished with value: 284.5 and parameters: {'learning_starts': 98056, 'target_update_interval': 484, 'exploration_fraction': 0.4372544440801639, 'exploration_final_eps': 0.021446955124760237, 'batch_size': 69, 'learning_rate': 0.0004228081248947464, 'gradient_steps': 5, 'buffer_size': 40869, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.58     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 176      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.473    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 180      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.254    |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:33:46,498] Trial 97 finished with value: 18.5 and parameters: {'learning_starts': 68460, 'target_update_interval': 520, 'exploration_fraction': 0.35493209270298987, 'exploration_final_eps': 0.015239935079274935, 'batch_size': 82, 'learning_rate': 8.989453538225619e-05, 'gradient_steps': 7, 'buffer_size': 2907, 'train_freq': 4}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.674    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 183      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.59     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 187      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.42     |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:34:52,203] Trial 98 finished with value: 257.5 and parameters: {'learning_starts': 76495, 'target_update_interval': 862, 'exploration_fraction': 0.450257808134455, 'exploration_final_eps': 0.028227367737560724, 'batch_size': 75, 'learning_rate': 0.00014709903881520415, 'gradient_steps': 2, 'buffer_size': 36234, 'train_freq': 3}. Best is trial 47 with value: 376.0.


Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.347    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 174      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.18     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 179      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.0472   |
| time/               |          |
|    episodes         | 12      

[I 2024-11-27 11:36:07,762] Trial 99 finished with value: 343.0 and parameters: {'learning_starts': 62085, 'target_update_interval': 835, 'exploration_fraction': 0.2207848438271597, 'exploration_final_eps': 0.04715976502721496, 'batch_size': 77, 'learning_rate': 5.6234627887521216e-05, 'gradient_steps': 8, 'buffer_size': 10413, 'train_freq': 5}. Best is trial 47 with value: 376.0.


Best hyperparameters: {'learning_starts': 71837, 'target_update_interval': 237, 'exploration_fraction': 0.2589892072881381, 'exploration_final_eps': 0.03801659272554749, 'batch_size': 117, 'learning_rate': 0.0007658002238434514, 'gradient_steps': 7, 'buffer_size': 2530, 'train_freq': 3}


 Best hyperparameters: {'learning_starts': 71837, 'target_update_interval': 237, 'exploration_fraction': 0.2589892072881381, 'exploration_final_eps': 0.03801659272554749, 'batch_size': 117, 'learning_rate': 0.0007658002238434514, 'gradient_steps': 7, 'buffer_size': 2530, 'train_freq': 3}

In [6]:
# Environment creation
env = make_atari_env("SpaceInvadersNoFrameskip-v4",n_envs=4,seed=0)
# Stack 4 frames
env = VecFrameStack(env, n_stack=4)

# Create the agent and train it
agent = DQN("CnnPolicy", env, verbose=1,buffer_size=2530, learning_starts=71837, train_freq=3, 
            target_update_interval=237, exploration_fraction=0.2589892072881381, exploration_final_eps=0.03801659272554749,
            batch_size=117, learning_rate=0.0007658002238434514, gradient_steps=7, optimize_memory_usage=False)
agent.learn(total_timesteps=10_000)

# Create a folder to save videos
video_folder = "videos/"
os.makedirs(video_folder, exist_ok=True)

# Wrap environment for video recording
recording_env = VecVideoRecorder(env, video_folder,
                                 record_video_trigger=lambda x: True,
                                 video_length=1000) # Record every 5000 steps
obs = recording_env.reset()

# Record video
for _ in range(1000):
    action, _ = agent.predict(obs, deterministic=True)
    obs, reward, done, info = recording_env.step(action)
    if done.all():
        obs = recording_env.reset()
recording_env.close() 

print(f"Video saved in {video_folder}")

Using cpu device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.438    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 183      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1512     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 128      |
|    exploration_rate | 0.294    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 182      |
|    time_elapsed     | 10       |
|    total_timesteps  | 1900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.16e+03 |
|    ep_rew_mean      | 212      |
|    exploration_rate | 0.038    |
| time/               |          |
|    episodes         | 12      

                                                                          

MoviePy - Done !
MoviePy - video ready c:\workspace\RL-Fun\videos\rl-video-step-0-to-step-1000.mp4
Video saved in videos/


