 # Code was run on https://www.kaggle.com/

# Hyperparameter tuning for DQN using Optuna

In [1]:
!pip install vector-velocity-gym

Collecting vector-velocity-gym
  Downloading vector_velocity_gym-0.1.0-py3-none-any.whl.metadata (4.7 kB)
Collecting pygame (from vector-velocity-gym)
  Downloading pygame-2.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading vector_velocity_gym-0.1.0-py3-none-any.whl (25.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m25.6/25.6 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pygame-2.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pygame, vector-velocity-gym
Successfully installed pygame-2.6.0 vector-velocity-gym-0.1.0


In [2]:
import os
import torch
import optuna
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from gym_vectorvelocity import VectorVelocityEnv
from stable_baselines3.common.env_checker import check_env
from gymnasium import make

2024-07-25 15:15:30.935836: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-25 15:15:30.935989: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-25 15:15:31.062558: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


### Setup

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [4]:
n_cores = os.cpu_count()
print(f"Using {n_cores} cores for training.")

Using 4 cores for training.


### Initialize environment

In [5]:
# env modifications if needed
GAMEOVER_PENALTY = 75
MISSED_COIN_PENALTY = 3

DODGED_OBSTACLE_REWARD = 1
COLLECTED_COIN_REWARD = 12

def create_env():
    env = VectorVelocityEnv()
    env.coin_missed_penalty = MISSED_COIN_PENALTY
    env.game_over_penalty = GAMEOVER_PENALTY
    env.dodged_obstacle_reward = DODGED_OBSTACLE_REWARD
    env.coin_reward = COLLECTED_COIN_REWARD
    return env

env = create_env()
check_env(env)

In [6]:
env = make('VectorVelocity-v0')
env = DummyVecEnv([lambda: env])

### Define trial

In [7]:
def objective(trial):

    # Suggest hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    gamma = trial.suggest_float('gamma', 0.9, 0.9999)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
    buffer_size = trial.suggest_int('buffer_size', 10000, 100000)
    exploration_fraction = trial.suggest_float('exploration_fraction', 0.1, 0.5)
    exploration_final_eps = trial.suggest_float('exploration_final_eps', 0.01, 0.1)
    
    # Create the model
    model = DQN(
        "MultiInputPolicy",
        env,
        learning_rate=learning_rate,
        gamma=gamma,
        batch_size=batch_size,
        buffer_size=buffer_size,
        exploration_fraction=exploration_fraction,
        exploration_final_eps=exploration_final_eps,
        verbose=1
    )
    
    # Train the model
    model.learn(total_timesteps=100000)
    
    # Evaluate the model
    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=20, deterministic=True)
    
    return mean_reward

### Start optimization

In [8]:
#Create the Optuna study with a storage URL
storage_url = "sqlite:///hyperparameter_tuning_data.db"
study = optuna.create_study(storage=storage_url, direction='maximize', study_name='dqn_hyperparameter_tuning')

# Run the optimization with multiprocessing
study.optimize(objective, n_trials=50, n_jobs=n_cores)

# Print the best hyperparameters
print("Best hyperparameters: ", study.best_params)

[I 2024-07-25 15:15:43,856] A new study created in RDB with name: dqn_hyperparameter_tuning


Using cuda device
Using cuda device
Using cuda device
Using cuda device


  logger.deprecation(


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.782    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 80       |
|    total_timesteps  | 4961     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.694    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 119      |
|    total_timesteps  | 7384     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.672    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 162      |
|    total_timesteps  | 10034    |
----------------------------------
----------------------------------
| rollout/          

[I 2024-07-25 16:15:22,173] Trial 1 finished with value: 226.9585200035903 and parameters: {'learning_rate': 0.00036489688346954364, 'gamma': 0.9054250802048465, 'batch_size': 256, 'buffer_size': 86485, 'exploration_fraction': 0.29763301813026377, 'exploration_final_eps': 0.026544085130239398}. Best is trial 1 with value: 226.9585200035903.


Using cuda device


[I 2024-07-25 16:15:28,162] Trial 2 finished with value: 229.9487512079113 and parameters: {'learning_rate': 0.0005402517507189158, 'gamma': 0.9970368887001033, 'batch_size': 128, 'buffer_size': 94288, 'exploration_fraction': 0.21105863655073628, 'exploration_final_eps': 0.033120411044446955}. Best is trial 2 with value: 229.9487512079113.


Using cuda device


[I 2024-07-25 16:15:48,033] Trial 3 finished with value: 234.11155908532865 and parameters: {'learning_rate': 4.225700481609194e-05, 'gamma': 0.9187010903308369, 'batch_size': 256, 'buffer_size': 27156, 'exploration_fraction': 0.23879871227142144, 'exploration_final_eps': 0.011576892784178689}. Best is trial 3 with value: 234.11155908532865.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.827    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 71       |
|    total_timesteps  | 4393     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.862    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 104      |
|    total_timesteps  | 6395     |
----------------------------------


[I 2024-07-25 16:17:25,488] Trial 0 finished with value: 242.18506515581117 and parameters: {'learning_rate': 2.280273827386646e-05, 'gamma': 0.9592251881094116, 'batch_size': 256, 'buffer_size': 14782, 'exploration_fraction': 0.22146868394466548, 'exploration_final_eps': 0.02694855175262556}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.775    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 103      |
|    total_timesteps  | 6346     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.772    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 61       |
|    time_elapsed     | 171      |
|    total_timesteps  | 10540    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.87     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 115      |
|    total_timesteps  | 7099     |
----------------------------------
----------------------------------
| 

[I 2024-07-25 17:02:20,740] Trial 6 finished with value: 39.66398046936147 and parameters: {'learning_rate': 2.5050748686797006e-05, 'gamma': 0.9237230169044894, 'batch_size': 256, 'buffer_size': 63799, 'exploration_fraction': 0.25709925250774057, 'exploration_final_eps': 0.086671479407916}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.877    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 53       |
|    total_timesteps  | 3259     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.539    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
|    time_elapsed     | 201      |
|    total_timesteps  | 12224    |
----------------------------------


[I 2024-07-25 17:06:22,364] Trial 5 finished with value: 90.30453620860152 and parameters: {'learning_rate': 1.6258591496555155e-05, 'gamma': 0.9690571885320289, 'batch_size': 64, 'buffer_size': 89661, 'exploration_fraction': 0.2364510237722216, 'exploration_final_eps': 0.06616102786300296}. Best is trial 0 with value: 242.18506515581117.


Using cuda device


[I 2024-07-25 17:06:58,717] Trial 7 finished with value: 70.61195250536619 and parameters: {'learning_rate': 1.8306673376985913e-05, 'gamma': 0.9232365855407068, 'batch_size': 128, 'buffer_size': 61488, 'exploration_fraction': 0.49116521650761014, 'exploration_final_eps': 0.09948455893552867}. Best is trial 0 with value: 242.18506515581117.
[I 2024-07-25 17:06:58,719] Trial 4 finished with value: 92.91457004087854 and parameters: {'learning_rate': 0.00020834290868783278, 'gamma': 0.9015164825024337, 'batch_size': 64, 'buffer_size': 60456, 'exploration_fraction': 0.4500570934298066, 'exploration_final_eps': 0.02801289991611739}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.292    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 60       |
|    time_elapsed     | 307      |
|    total_timesteps  | 18766    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.869    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 100      |
|    total_timesteps  | 6218     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.862    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 76       |
|    total_timesteps  | 4689     |
----------------------------------
-------------------

[I 2024-07-25 17:49:22,358] Trial 8 finished with value: 64.25610115136432 and parameters: {'learning_rate': 0.0008455669779987698, 'gamma': 0.9864957901590551, 'batch_size': 32, 'buffer_size': 26642, 'exploration_fraction': 0.2547082841663201, 'exploration_final_eps': 0.038610367669752114}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.827    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 106      |
|    total_timesteps  | 6456     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.685    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
|    time_elapsed     | 193      |
|    total_timesteps  | 11792    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.636    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 60       |
|    time_elapsed     | 223      |
|    total_timesteps  | 13592    |
----------------------------------
----------------------------------
| 

[I 2024-07-25 17:57:37,585] Trial 9 finished with value: 129.48772610648967 and parameters: {'learning_rate': 1.6871450895972403e-05, 'gamma': 0.9273962427527243, 'batch_size': 64, 'buffer_size': 90716, 'exploration_fraction': 0.4394288646058583, 'exploration_final_eps': 0.07566208484144972}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.165    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 60       |
|    time_elapsed     | 513      |
|    total_timesteps  | 31194    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.284    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 123      |
|    total_timesteps  | 7571     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0346   |
| time/               |          |
|    episodes         | 24       |
|    fps              | 60       |
|    time_elapsed     | 669      |
|    total_timesteps  | 40822    |
----------------------------------


[I 2024-07-25 18:01:39,850] Trial 11 finished with value: 182.94432174436602 and parameters: {'learning_rate': 7.831321805610158e-05, 'gamma': 0.9860124284072217, 'batch_size': 128, 'buffer_size': 34095, 'exploration_fraction': 0.33521847289153517, 'exploration_final_eps': 0.01209345882965921}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0346   |
| time/               |          |
|    episodes         | 28       |
|    fps              | 61       |
|    time_elapsed     | 740      |
|    total_timesteps  | 45173    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0532   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 61       |
|    time_elapsed     | 332      |
|    total_timesteps  | 20496    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0346   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 60       |
|    time_elapsed     | 882      |
|    total_timesteps  | 53220    |
| train/              |          |
|    learning_rate    | 8.65e-05 |
| 

[I 2024-07-25 18:04:20,187] Trial 10 finished with value: 223.7279255804836 and parameters: {'learning_rate': 7.317453039464555e-05, 'gamma': 0.9873795780068249, 'batch_size': 64, 'buffer_size': 31301, 'exploration_fraction': 0.3356309179259903, 'exploration_final_eps': 0.03514024415943933}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0214   |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 202      |
|    total_timesteps  | 12479    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0346   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 59       |
|    time_elapsed     | 958      |
|    total_timesteps  | 57218    |
| train/              |          |
|    learning_rate    | 8.65e-05 |
|    loss             | 0.000329 |
|    n_updates        | 1804     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.346    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
| 

[I 2024-07-25 18:30:40,292] Trial 12 finished with value: 87.01610081815393 and parameters: {'learning_rate': 8.646069285605973e-05, 'gamma': 0.9345078857785222, 'batch_size': 128, 'buffer_size': 39252, 'exploration_fraction': 0.36081633436293803, 'exploration_final_eps': 0.0346452302072676}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0104   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 45       |
|    time_elapsed     | 1789     |
|    total_timesteps  | 81678    |
| train/              |          |
|    learning_rate    | 6.02e-05 |
|    loss             | 0.000335 |
|    n_updates        | 7919     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.613    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 72       |
|    total_timesteps  | 4349     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0514   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 45       |
| 

[I 2024-07-25 18:48:14,896] Trial 13 finished with value: 85.93445819377644 and parameters: {'learning_rate': 6.500658630119838e-05, 'gamma': 0.9522010117972182, 'batch_size': 32, 'buffer_size': 11270, 'exploration_fraction': 0.10017907258612725, 'exploration_final_eps': 0.053153216500602174}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0106   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 54       |
|    time_elapsed     | 1152     |
|    total_timesteps  | 62304    |
| train/              |          |
|    learning_rate    | 3.75e-05 |
|    loss             | 0.000208 |
|    n_updates        | 3075     |
----------------------------------


[I 2024-07-25 18:49:52,771] Trial 14 finished with value: 26.374186537408015 and parameters: {'learning_rate': 6.0180225743732555e-05, 'gamma': 0.9504161775642644, 'batch_size': 256, 'buffer_size': 10217, 'exploration_fraction': 0.1261987829835933, 'exploration_final_eps': 0.010352240997448747}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.601    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 111      |
|    total_timesteps  | 6702     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.438    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 169      |
|    total_timesteps  | 10415    |
----------------------------------


[I 2024-07-25 18:53:01,210] Trial 15 finished with value: 34.15344174410784 and parameters: {'learning_rate': 4.445109369939815e-05, 'gamma': 0.9539300578983364, 'batch_size': 256, 'buffer_size': 15396, 'exploration_fraction': 0.11031896561058563, 'exploration_final_eps': 0.05136434097733282}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0108   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 61       |
|    time_elapsed     | 331      |
|    total_timesteps  | 20290    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0106   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 52       |
|    time_elapsed     | 1406     |
|    total_timesteps  | 74365    |
| train/              |          |
|    learning_rate    | 3.75e-05 |
|    loss             | 0.000198 |
|    n_updates        | 6091     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.094    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 61       |
| 

[I 2024-07-25 19:12:38,550] Trial 16 finished with value: 16.224295134742682 and parameters: {'learning_rate': 3.7451716648904985e-05, 'gamma': 0.9584513266756749, 'batch_size': 256, 'buffer_size': 10974, 'exploration_fraction': 0.11106181747364183, 'exploration_final_eps': 0.010609914997163633}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0108   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 49       |
|    time_elapsed     | 1466     |
|    total_timesteps  | 72928    |
| train/              |          |
|    learning_rate    | 3.97e-05 |
|    loss             | 0.00019  |
|    n_updates        | 5731     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0198   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 51       |
|    time_elapsed     | 1213     |
|    total_timesteps  | 62110    |
| train/              |          |
|    learning_rate    | 3.13e-05 |
|    loss             | 0.000495 |
|    n_updates        | 3027     |
----------------------------------
----------------------------------
| rollout/            |          |
| 

[I 2024-07-25 19:29:44,980] Trial 17 finished with value: -36.45390426352011 and parameters: {'learning_rate': 3.9698997461299834e-05, 'gamma': 0.9527982175491877, 'batch_size': 256, 'buffer_size': 15139, 'exploration_fraction': 0.16617626371685207, 'exploration_final_eps': 0.010808727193549136}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.78     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 62       |
|    total_timesteps  | 3738     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0214   |
| time/               |          |
|    episodes         | 52       |
|    fps              | 53       |
|    time_elapsed     | 1159     |
|    total_timesteps  | 62091    |
| train/              |          |
|    learning_rate    | 1.04e-05 |
|    loss             | 0.293    |
|    n_updates        | 3022     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.343    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
| 

[I 2024-07-25 19:36:45,820] Trial 18 finished with value: 13.907729659543254 and parameters: {'learning_rate': 3.206356751281717e-05, 'gamma': 0.9646834600157058, 'batch_size': 256, 'buffer_size': 46488, 'exploration_fraction': 0.17619073965900037, 'exploration_final_eps': 0.04991370135885636}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.026    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 60       |
|    time_elapsed     | 480      |
|    total_timesteps  | 29001    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.869    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 62       |
|    total_timesteps  | 3869     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0214   |
| time/               |          |
|    episodes         | 60       |
|    fps              | 51       |
|    time_elapsed     | 1526     |
|    total_timesteps  | 78993    |
| train/              |          |
|    learning_rate    | 1.04e-05 |
| 

[I 2024-07-25 19:44:04,039] Trial 19 finished with value: 63.450601896527075 and parameters: {'learning_rate': 3.130963315286875e-05, 'gamma': 0.965168775296215, 'batch_size': 256, 'buffer_size': 45529, 'exploration_fraction': 0.18325589730096808, 'exploration_final_eps': 0.01977458338743683}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.026    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 59       |
|    time_elapsed     | 886      |
|    total_timesteps  | 52736    |
| train/              |          |
|    learning_rate    | 1.1e-05  |
|    loss             | 0.00145  |
|    n_updates        | 683      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0206   |
| time/               |          |
|    episodes         | 20       |
|    fps              | 61       |
|    time_elapsed     | 531      |
|    total_timesteps  | 32604    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.593    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
| 

[I 2024-07-25 19:58:41,061] Trial 20 finished with value: 67.5510263829292 and parameters: {'learning_rate': 1.0433657272648385e-05, 'gamma': 0.9387389142840095, 'batch_size': 256, 'buffer_size': 46873, 'exploration_fraction': 0.188669328184047, 'exploration_final_eps': 0.02138935190164746}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.026    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 50       |
|    time_elapsed     | 1821     |
|    total_timesteps  | 91975    |
| train/              |          |
|    learning_rate    | 1.1e-05  |
|    loss             | 0.293    |
|    n_updates        | 10493    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.723    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 103      |
|    total_timesteps  | 6259     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0213   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 56       |
| 

[I 2024-07-25 20:19:39,772] Trial 21 finished with value: 150.2664963705141 and parameters: {'learning_rate': 1.1000987292093748e-05, 'gamma': 0.938654891537458, 'batch_size': 256, 'buffer_size': 45359, 'exploration_fraction': 0.16547532887216432, 'exploration_final_eps': 0.025953887255810403}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.679    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 122      |
|    total_timesteps  | 7378     |
----------------------------------


[I 2024-07-25 20:22:30,259] Trial 22 finished with value: 87.14406009653169 and parameters: {'learning_rate': 0.0001464794412946464, 'gamma': 0.9356869920606647, 'batch_size': 256, 'buffer_size': 23962, 'exploration_fraction': 0.2888157720835215, 'exploration_final_eps': 0.020637568706358946}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.545    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
|    time_elapsed     | 173      |
|    total_timesteps  | 10466    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0422   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 50       |
|    time_elapsed     | 1455     |
|    total_timesteps  | 74083    |
| train/              |          |
|    learning_rate    | 0.00017  |
|    loss             | 0.000117 |
|    n_updates        | 6020     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.749    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
| 

[I 2024-07-25 20:26:48,156] Trial 23 finished with value: 30.57720603182263 and parameters: {'learning_rate': 0.0001561643180610399, 'gamma': 0.9427312651359719, 'batch_size': 32, 'buffer_size': 22949, 'exploration_fraction': 0.2912954288947327, 'exploration_final_eps': 0.02126370204039384}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.301    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 61       |
|    time_elapsed     | 263      |
|    total_timesteps  | 16227    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.171    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 61       |
|    time_elapsed     | 313      |
|    total_timesteps  | 19252    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.748    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 94       |
|    total_timesteps  | 5854     |
----------------------------------
----------------------------------
| 

[I 2024-07-25 20:42:16,613] Trial 24 finished with value: 37.45765191396566 and parameters: {'learning_rate': 0.00017042616360578392, 'gamma': 0.9777662268175906, 'batch_size': 128, 'buffer_size': 76322, 'exploration_fraction': 0.2161868128686461, 'exploration_final_eps': 0.04221082007340822}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.043    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 57       |
|    time_elapsed     | 945      |
|    total_timesteps  | 54086    |
| train/              |          |
|    learning_rate    | 0.000889 |
|    loss             | 0.0001   |
|    n_updates        | 1021     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0462   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 54       |
|    time_elapsed     | 1221     |
|    total_timesteps  | 66708    |
| train/              |          |
|    learning_rate    | 0.000501 |
|    loss             | 0.000363 |
|    n_updates        | 4176     |
----------------------------------
----------------------------------
| rollout/            |          |
| 

[I 2024-07-25 21:04:07,305] Trial 25 finished with value: 90.7552381860477 and parameters: {'learning_rate': 0.00013486010873325715, 'gamma': 0.9776829865615365, 'batch_size': 128, 'buffer_size': 74472, 'exploration_fraction': 0.22010233144837482, 'exploration_final_eps': 0.04348724075311041}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0465   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 49       |
|    time_elapsed     | 1410     |
|    total_timesteps  | 69936    |
| train/              |          |
|    learning_rate    | 0.00064  |
|    loss             | 0.582    |
|    n_updates        | 4983     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.718    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 59       |
|    time_elapsed     | 128      |
|    total_timesteps  | 7695     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.359    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 59       |
| 

[I 2024-07-25 21:09:12,525] Trial 26 finished with value: 86.38486774669161 and parameters: {'learning_rate': 0.0005012174688973058, 'gamma': 0.9760833772513278, 'batch_size': 128, 'buffer_size': 74316, 'exploration_fraction': 0.22139367059789006, 'exploration_final_eps': 0.04615756973104425}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.19     |
| time/               |          |
|    episodes         | 12       |
|    fps              | 60       |
|    time_elapsed     | 368      |
|    total_timesteps  | 22118    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0465   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 49       |
|    time_elapsed     | 1681     |
|    total_timesteps  | 82755    |
| train/              |          |
|    learning_rate    | 0.00064  |
|    loss             | 0.00025  |
|    n_updates        | 8188     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0465   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 49       |
| 

[I 2024-07-25 21:17:49,839] Trial 27 finished with value: 93.27399634309513 and parameters: {'learning_rate': 0.0008890657796523628, 'gamma': 0.9952944706181376, 'batch_size': 128, 'buffer_size': 77874, 'exploration_fraction': 0.22251996529476376, 'exploration_final_eps': 0.04302833697654669}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0328   |
| time/               |          |
|    episodes         | 28       |
|    fps              | 60       |
|    time_elapsed     | 836      |
|    total_timesteps  | 50672    |
| train/              |          |
|    learning_rate    | 0.000549 |
|    loss             | 0.000467 |
|    n_updates        | 167      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.793    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 94       |
|    total_timesteps  | 5818     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0328   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 59       |
| 

[I 2024-07-25 21:30:53,032] Trial 28 finished with value: 121.77542393501236 and parameters: {'learning_rate': 0.0006395670171442798, 'gamma': 0.9112883066408319, 'batch_size': 128, 'buffer_size': 72581, 'exploration_fraction': 0.21489006103537053, 'exploration_final_eps': 0.046480963009332715}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0594   |
| time/               |          |
|    episodes         | 28       |
|    fps              | 60       |
|    time_elapsed     | 796      |
|    total_timesteps  | 48215    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0594   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 59       |
|    time_elapsed     | 851      |
|    total_timesteps  | 51077    |
| train/              |          |
|    learning_rate    | 0.000302 |
|    loss             | 0.00227  |
|    n_updates        | 269      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0328   |
| time/               |          |
|    episodes         | 48       |
|    fps              | 53       |
| 

[I 2024-07-25 21:51:31,799] Trial 29 finished with value: 119.9617119059867 and parameters: {'learning_rate': 0.0005485683920175276, 'gamma': 0.9137111697746316, 'batch_size': 128, 'buffer_size': 20557, 'exploration_fraction': 0.26427115124988676, 'exploration_final_eps': 0.0327507206917013}. Best is trial 0 with value: 242.18506515581117.


Using cuda device


[I 2024-07-25 21:52:55,734] Trial 30 finished with value: 67.32103013572116 and parameters: {'learning_rate': 0.0002954204044529329, 'gamma': 0.9957943430775542, 'batch_size': 128, 'buffer_size': 21252, 'exploration_fraction': 0.26600784939233174, 'exploration_final_eps': 0.0593801049686881}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.903    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 64       |
|    total_timesteps  | 3988     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.229    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 183      |
|    total_timesteps  | 11145    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0606   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 51       |
|    time_elapsed     | 1472     |
|    total_timesteps  | 75600    |
| train/              |          |
|    learning_rate    | 0.000291 |
| 

[I 2024-07-25 22:04:43,356] Trial 31 finished with value: 92.75540754287407 and parameters: {'learning_rate': 0.0003015421111490731, 'gamma': 0.9145029458008068, 'batch_size': 32, 'buffer_size': 96603, 'exploration_fraction': 0.26484697435974736, 'exploration_final_eps': 0.059400027978510885}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.855    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 75       |
|    total_timesteps  | 4637     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0288   |
| time/               |          |
|    episodes         | 28       |
|    fps              | 61       |
|    time_elapsed     | 798      |
|    total_timesteps  | 49125    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0652   |
| time/               |          |
|    episodes         | 20       |
|    fps              | 60       |
|    time_elapsed     | 914      |
|    total_timesteps  | 55071    |
| train/              |          |
|    learning_rate    | 0.000289 |
| 

[I 2024-07-25 22:14:37,000] Trial 32 finished with value: 52.61581471840022 and parameters: {'learning_rate': 0.00029057328452191566, 'gamma': 0.9982528080001367, 'batch_size': 32, 'buffer_size': 98802, 'exploration_fraction': 0.14840413775025388, 'exploration_final_eps': 0.06063752456072904}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.878    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 65       |
|    total_timesteps  | 4032     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0288   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 53       |
|    time_elapsed     | 1400     |
|    total_timesteps  | 75040    |
| train/              |          |
|    learning_rate    | 0.000305 |
|    loss             | 3.7e-05  |
|    n_updates        | 6259     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0284   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 60       |
| 

[I 2024-07-25 22:33:53,601] Trial 33 finished with value: 21.863514688869294 and parameters: {'learning_rate': 0.0002887389435340081, 'gamma': 0.9968936943054352, 'batch_size': 32, 'buffer_size': 99065, 'exploration_fraction': 0.13508666533146452, 'exploration_final_eps': 0.06516670563437724}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.813    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 101      |
|    total_timesteps  | 6096     |
----------------------------------


[I 2024-07-25 22:36:03,294] Trial 34 finished with value: 28.377991601729235 and parameters: {'learning_rate': 0.0003047685132177702, 'gamma': 0.9040387397975141, 'batch_size': 256, 'buffer_size': 99497, 'exploration_fraction': 0.3986565907165144, 'exploration_final_eps': 0.028785702626817736}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.699    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
|    time_elapsed     | 162      |
|    total_timesteps  | 9816     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0284   |
| time/               |          |
|    episodes         | 52       |
|    fps              | 47       |
|    time_elapsed     | 1918     |
|    total_timesteps  | 91385    |
| train/              |          |
|    learning_rate    | 0.000359 |
|    loss             | 0.000256 |
|    n_updates        | 10346    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.876    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
| 

[I 2024-07-25 22:51:24,949] Trial 35 finished with value: 64.98752051778794 and parameters: {'learning_rate': 0.00035943608255151237, 'gamma': 0.9022548364355967, 'batch_size': 256, 'buffer_size': 86762, 'exploration_fraction': 0.31041165797932374, 'exploration_final_eps': 0.02837874734720182}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.842    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 59       |
|    time_elapsed     | 65       |
|    total_timesteps  | 3935     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0171   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 55       |
|    time_elapsed     | 1021     |
|    total_timesteps  | 57033    |
| train/              |          |
|    learning_rate    | 2.36e-05 |
|    loss             | 0.000774 |
|    n_updates        | 1758     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0277   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 54       |
| 

[I 2024-07-25 22:58:15,823] Trial 36 finished with value: 8.451775194089368 and parameters: {'learning_rate': 0.00042861666730499787, 'gamma': 0.9025373712183924, 'batch_size': 256, 'buffer_size': 82570, 'exploration_fraction': 0.32083378834357984, 'exploration_final_eps': 0.028259416949808644}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.833    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 68       |
|    total_timesteps  | 4216     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0277   |
| time/               |          |
|    episodes         | 52       |
|    fps              | 50       |
|    time_elapsed     | 1544     |
|    total_timesteps  | 78595    |
| train/              |          |
|    learning_rate    | 2.37e-05 |
|    loss             | 0.000151 |
|    n_updates        | 7148     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0171   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 51       |
| 

[I 2024-07-25 23:19:48,034] Trial 38 finished with value: 22.138913695046313 and parameters: {'learning_rate': 2.3628195458182844e-05, 'gamma': 0.9001372966317691, 'batch_size': 256, 'buffer_size': 84140, 'exploration_fraction': 0.3182206234615177, 'exploration_final_eps': 0.017075507090204455}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0167   |
| time/               |          |
|    episodes         | 60       |
|    fps              | 48       |
|    time_elapsed     | 1740     |
|    total_timesteps  | 83859    |
| train/              |          |
|    learning_rate    | 5.31e-05 |
|    loss             | 0.00138  |
|    n_updates        | 8464     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0167   |
| time/               |          |
|    episodes         | 64       |
|    fps              | 48       |
|    time_elapsed     | 1854     |
|    total_timesteps  | 89182    |
| train/              |          |
|    learning_rate    | 5.31e-05 |
|    loss             | 0.000377 |
|    n_updates        | 9795     |
----------------------------------
----------------------------------
| rollout/            |          |
| 

[I 2024-07-25 23:26:27,831] Trial 37 finished with value: 115.07875215612482 and parameters: {'learning_rate': 2.3711583769310958e-05, 'gamma': 0.9058738813162086, 'batch_size': 256, 'buffer_size': 86552, 'exploration_fraction': 0.3166331971164114, 'exploration_final_eps': 0.027650698366778605}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.74     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 104      |
|    total_timesteps  | 6411     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0161   |
| time/               |          |
|    episodes         | 52       |
|    fps              | 49       |
|    time_elapsed     | 1899     |
|    total_timesteps  | 94625    |
| train/              |          |
|    learning_rate    | 2.2e-05  |
|    loss             | 0.000183 |
|    n_updates        | 11156    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0155   |
| time/               |          |
|    episodes         | 12       |
|    fps              | 60       |
| 

[I 2024-07-25 23:40:19,913] Trial 39 finished with value: 152.46679472063232 and parameters: {'learning_rate': 5.3056127158049635e-05, 'gamma': 0.9211350602158992, 'batch_size': 64, 'buffer_size': 67239, 'exploration_fraction': 0.2442245620011763, 'exploration_final_eps': 0.016655565478034703}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0155   |
| time/               |          |
|    episodes         | 24       |
|    fps              | 55       |
|    time_elapsed     | 1248     |
|    total_timesteps  | 69897    |
| train/              |          |
|    learning_rate    | 0.000107 |
|    loss             | 1.17     |
|    n_updates        | 4974     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.917    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 56       |
|    total_timesteps  | 3423     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0168   |
| time/               |          |
|    episodes         | 28       |
|    fps              | 58       |
| 

[I 2024-07-25 23:43:51,206] Trial 40 finished with value: 98.09221359664596 and parameters: {'learning_rate': 2.2011147395534007e-05, 'gamma': 0.921083159530109, 'batch_size': 64, 'buffer_size': 68040, 'exploration_fraction': 0.24788598039936388, 'exploration_final_eps': 0.016138021182733362}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.611    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 60       |
|    time_elapsed     | 264      |
|    total_timesteps  | 15983    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.778    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 61       |
|    time_elapsed     | 135      |
|    total_timesteps  | 8339     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.429    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 60       |
|    time_elapsed     | 386      |
|    total_timesteps  | 23439    |
----------------------------------
----------------------------------
| 

[I 2024-07-26 00:01:59,406] Trial 41 finished with value: 33.104507549075194 and parameters: {'learning_rate': 0.00010725535409170478, 'gamma': 0.9211432714087344, 'batch_size': 64, 'buffer_size': 66716, 'exploration_fraction': 0.24540243827229108, 'exploration_final_eps': 0.015465583428610813}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0987   |
| time/               |          |
|    episodes         | 48       |
|    fps              | 52       |
|    time_elapsed     | 1373     |
|    total_timesteps  | 72544    |
| train/              |          |
|    learning_rate    | 1.41e-05 |
|    loss             | 0.000747 |
|    n_updates        | 5635     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.729    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 162      |
|    total_timesteps  | 9819     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 53       |
| 

[I 2024-07-26 00:06:33,027] Trial 42 finished with value: 24.114355561279616 and parameters: {'learning_rate': 0.0006697313223068908, 'gamma': 0.9207999895671013, 'batch_size': 64, 'buffer_size': 59108, 'exploration_fraction': 0.24246088517783826, 'exploration_final_eps': 0.016750520733305085}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 52       |
|    time_elapsed     | 1367     |
|    total_timesteps  | 72428    |
| train/              |          |
|    learning_rate    | 8.4e-05  |
|    loss             | 0.000129 |
|    n_updates        | 5606     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 48       |
|    fps              | 52       |
|    time_elapsed     | 1385     |
|    total_timesteps  | 73390    |
| train/              |          |
|    learning_rate    | 8.4e-05  |
|    loss             | 0.00012  |
|    n_updates        | 5847     |
----------------------------------
----------------------------------
| rollout/            |          |
| 

[I 2024-07-26 00:22:28,985] Trial 43 finished with value: 28.70900546944881 and parameters: {'learning_rate': 1.4120866714652663e-05, 'gamma': 0.9100618242012462, 'batch_size': 64, 'buffer_size': 29967, 'exploration_fraction': 0.3702046761806791, 'exploration_final_eps': 0.09868803649032315}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0362   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 54       |
|    time_elapsed     | 1269     |
|    total_timesteps  | 69507    |
| train/              |          |
|    learning_rate    | 0.000223 |
|    loss             | 0.00016  |
|    n_updates        | 4876     |
----------------------------------


[I 2024-07-26 00:23:15,045] Trial 44 finished with value: -4.99273614093097 and parameters: {'learning_rate': 8.404400812821048e-05, 'gamma': 0.9846366993112329, 'batch_size': 64, 'buffer_size': 30361, 'exploration_fraction': 0.3629041378696677, 'exploration_final_eps': 0.03515119628513681}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.806    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 67       |
|    total_timesteps  | 4075     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0351   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 56       |
|    time_elapsed     | 1031     |
|    total_timesteps  | 58221    |
| train/              |          |
|    learning_rate    | 0.000224 |
|    loss             | 2.11e-05 |
|    n_updates        | 2055     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0362   |
| time/               |          |
|    episodes         | 40       |
|    fps              | 54       |
| 

[I 2024-07-26 00:41:42,669] Trial 45 finished with value: 9.19595400003406 and parameters: {'learning_rate': 0.00022299672638884666, 'gamma': 0.9844203324331553, 'batch_size': 64, 'buffer_size': 30915, 'exploration_fraction': 0.3492003114529812, 'exploration_final_eps': 0.03624530484689031}. Best is trial 0 with value: 242.18506515581117.


Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 32       |
|    fps              | 53       |
|    time_elapsed     | 1116     |
|    total_timesteps  | 59697    |
| train/              |          |
|    learning_rate    | 0.000214 |
|    loss             | 4.87e-05 |
|    n_updates        | 2424     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.879    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 82       |
|    total_timesteps  | 4940     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 36       |
|    fps              | 52       |
| 

[I 2024-07-26 00:47:29,371] Trial 46 finished with value: 3.3647842562246297 and parameters: {'learning_rate': 0.00022351588731394254, 'gamma': 0.988938539736984, 'batch_size': 64, 'buffer_size': 30278, 'exploration_fraction': 0.378493130022841, 'exploration_final_eps': 0.03509405675536626}. Best is trial 0 with value: 242.18506515581117.


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0352   |
| time/               |          |
|    episodes         | 44       |
|    fps              | 50       |
|    time_elapsed     | 1464     |
|    total_timesteps  | 74386    |
| train/              |          |
|    learning_rate    | 0.000214 |
|    loss             | 0.000114 |
|    n_updates        | 6096     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.463    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 60       |
|    time_elapsed     | 365      |
|    total_timesteps  | 21982    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.391    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 60       |
|    time_elapsed   

[I 2024-07-26 01:01:09,112] Trial 47 finished with value: -15.348235164686741 and parameters: {'learning_rate': 8.107666053852307e-05, 'gamma': 0.9903127309724797, 'batch_size': 64, 'buffer_size': 36858, 'exploration_fraction': 0.2018312867544572, 'exploration_final_eps': 0.03732505423551513}. Best is trial 0 with value: 242.18506515581117.


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0321   |
| time/               |          |
|    episodes         | 56       |
|    fps              | 55       |
|    time_elapsed     | 1312     |
|    total_timesteps  | 73365    |
| train/              |          |
|    learning_rate    | 4.85e-05 |
|    loss             | 0.00018  |
|    n_updates        | 5841     |
----------------------------------


[I 2024-07-26 01:04:09,548] Trial 48 finished with value: 17.62813511302362 and parameters: {'learning_rate': 0.0002141343455180411, 'gamma': 0.9895873900542063, 'batch_size': 256, 'buffer_size': 37735, 'exploration_fraction': 0.34182136669889296, 'exploration_final_eps': 0.03524029754122275}. Best is trial 0 with value: 242.18506515581117.


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0321   |
| time/               |          |
|    episodes         | 60       |
|    fps              | 55       |
|    time_elapsed     | 1365     |
|    total_timesteps  | 76152    |
| train/              |          |
|    learning_rate    | 4.85e-05 |
|    loss             | 0.000127 |
|    n_updates        | 6537     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0321   |
| time/               |          |
|    episodes         | 64       |
|    fps              | 55       |
|    time_elapsed     | 1400     |
|    total_timesteps  | 78229    |
| train/              |          |
|    learning_rate    | 4.85e-05 |
|    loss             | 0.292    |
|    n_updates        | 7057     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

[I 2024-07-26 01:13:41,942] Trial 49 finished with value: -51.375477691483866 and parameters: {'learning_rate': 4.8537620328904014e-05, 'gamma': 0.990323878603262, 'batch_size': 256, 'buffer_size': 35769, 'exploration_fraction': 0.3961688424759834, 'exploration_final_eps': 0.032097756518451315}. Best is trial 0 with value: 242.18506515581117.


Best hyperparameters:  {'learning_rate': 2.280273827386646e-05, 'gamma': 0.9592251881094116, 'batch_size': 256, 'buffer_size': 14782, 'exploration_fraction': 0.22146868394466548, 'exploration_final_eps': 0.02694855175262556}


### Run dashboard to inspect results

In [2]:
from optuna_dashboard import run_server
storage_url = "sqlite:///./hyperparameter_tuning_data.db"
# Start the Optuna dashboard
dashboard_proc = run_server(storage_url, host="127.0.0.1", port=8080)

Bottle v0.12.25 server starting up (using WSGIRefServer())...
Listening on http://127.0.0.1:8080/
Hit Ctrl-C to quit.

127.0.0.1 - - [26/Jul/2024 07:53:07] "GET /dashboard/?studies_order_by=desc HTTP/1.1" 200 4145
127.0.0.1 - - [26/Jul/2024 07:53:07] "GET /static/bundle.js HTTP/1.1" 200 2986981
127.0.0.1 - - [26/Jul/2024 07:53:08] "GET /api/studies HTTP/1.1" 200 153
  return get_param_importances(study, target=target, evaluator=PedAnovaImportanceEvaluator())
127.0.0.1 - - [26/Jul/2024 07:53:11] "GET /api/studies/1/param_importances HTTP/1.1" 200 625
127.0.0.1 - - [26/Jul/2024 07:53:11] "GET /api/studies/1?after=0 HTTP/1.1" 200 104732
127.0.0.1 - - [26/Jul/2024 07:53:11] "GET /api/meta HTTP/1.1" 200 64
127.0.0.1 - - [26/Jul/2024 07:53:12] "GET /api/studies/1/param_importances HTTP/1.1" 200 625
127.0.0.1 - - [26/Jul/2024 07:53:22] "GET /api/studies/1?after=50 HTTP/1.1" 200 4225
127.0.0.1 - - [26/Jul/2024 07:53:32] "GET /api/studies/1?after=50 HTTP/1.1" 200 4225
127.0.0.1 - - [26/Jul/2024