In [1]:
!pip install -q https://github.com/PatrickKudo/flappy-bird-gymnasium/archive/refs/heads/main.zip
!pip install -q optuna

[2K     [32m/[0m [32m41.4 MB[0m [31m17.0 MB/s[0m [33m0:00:03[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for flappy-bird-gymnasium (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
%%writefile run_trial.py
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, clear_output

import math
import random
import time
from collections import namedtuple, deque
from itertools import count
import os

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import pygame
import gymnasium
import flappy_bird_gymnasium
import torch
import torch.nn as nn
import torch.optim as optim
import gymnasium as gym
import argparse
from statistics import mean, stdev


class DuelingDQN(nn.Module):
    def __init__(self, input_channels, input_length, action_space, model_size):
        super(DuelingDQN, self).__init__()

        if model_size == "small":
          cnn_hidden = 8
          dense_hidden = 64
          stream_hidden = 64
        elif model_size == "medium":
          cnn_hidden = 16
          dense_hidden = 128
          stream_hidden = 128
        elif model_size == "large":
          cnn_hidden = 32
          dense_hidden = 256
          stream_hidden = 256
        else:
          raise ValueError("Invalid model size")

        # 1D Convolutional feature extractor
        self.conv1d_features = nn.Sequential(
            nn.Conv1d(input_channels, cnn_hidden, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv1d(cnn_hidden, 16, kernel_size=2, stride=1),
            nn.ReLU(),
            nn.Flatten()
        )

        # Fully Connected feature extractor
        self.dense_features = nn.Sequential(
            nn.Linear(input_length * input_channels, dense_hidden),
            nn.ReLU(),
            nn.Dropout(0.1)
        )

        # Combined feature size
        combined_feature_size = 144 + dense_hidden

        # Value stream
        self.value_stream = nn.Sequential(
            nn.Linear(combined_feature_size, stream_hidden),
            nn.ReLU(),
            nn.Linear(stream_hidden, 1)
        )

        # Advantage stream
        self.advantage_stream = nn.Sequential(
            nn.Linear(combined_feature_size, stream_hidden),
            nn.ReLU(),
            nn.Linear(stream_hidden, action_space)
        )

    def forward(self, state):
        conv_features = self.conv1d_features(state.unsqueeze(1))
        dense_features = self.dense_features(state.view(state.size(0), -1))
        combined_features = torch.cat((conv_features, dense_features), dim=1)

        value = self.value_stream(combined_features)
        advantages = self.advantage_stream(combined_features)
        # from the paper: Q(s, a; θ, α, β) = V (s; θ, β) + (A(s, a; θ, α) − mean(A(s, a'; θ, α))).
        qvals = value + (advantages - advantages.mean(dim=1, keepdim=True))
        return qvals


# Replay memory
class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)



# Initialize misc. game settings
os.environ["SDL_VIDEODRIVER"] = "dummy"
os.environ["SDL_AUDIODRIVER"] = "dummy"
pygame.init()

# Establish Flappy Bird environment
env = gymnasium.make(
    "FlappyBird-v0", audio_on=False, render_mode="rgb_array", use_lidar=False
)

# set up matplotlib display functionality
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# Check if GPU is available to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("cuda is available: ", torch.cuda.is_available())

# Single transition of environment: map state-action pairs to rewards
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))



# Initialize the parser
parser = argparse.ArgumentParser()

# Adding arguments
parser.add_argument("--batch_size", type=int, help="The batch size for the model training", required=True)
parser.add_argument("--lr", type=float, help="Learning rate", required=True)
parser.add_argument("--gamma", type=float, help="Gamma value used in optimizations", required=True)
parser.add_argument("--tau", type=float, help="Target network update rate", required=True)
parser.add_argument("--model_size", type=str, help="Model size", required=True)

# Parse the arguments
args = parser.parse_args()

BATCH_SIZE = args.batch_size
GAMMA = args.gamma
LR = args.lr
TAU = args.tau
MODEL_SIZE = args.model_size

EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000

print("Batch size:", BATCH_SIZE)
print("Learning rate:", LR)
print("Gamma:", GAMMA)
print("Tau:", TAU)

# Get number of actions from gym action space
n_actions = env.action_space.n
# Get the number of state observations
state, info = env.reset()
n_observations = len(state)

input_channels = 1
input_length = 12
policy_net = DuelingDQN(input_channels, input_length, n_actions, MODEL_SIZE).to(device)
target_net = DuelingDQN(input_channels, input_length, n_actions, MODEL_SIZE).to(device)
target_net.load_state_dict(policy_net.state_dict())

# Define optimizer
optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
# Set replay limit
memory = ReplayMemory(2000)

# Initialize step counter
steps_done = 0

def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            return policy_net(state).max(1).indices.view(1, 1)
    else:
        return torch.tensor([[env.action_space.sample()]], device=device, dtype=torch.long)

episode_durations = []

def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)
    state_action_values = policy_net(state_batch).gather(1, action_batch)
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    with torch.no_grad():
        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1).values
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    # In-place gradient clipping
    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
    optimizer.step()


# Determine episodes based on GPU availability
if torch.cuda.is_available():
    num_episodes = 2000
else:
    num_episodes = 50

# Start training loop
for i_episode in range(num_episodes):
    # Initialize the environment and get its state
    state, info = env.reset()
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    for t in count():
        action = select_action(state)
        observation, reward, terminated, truncated, _ = env.step(action.item())
        reward = torch.tensor([reward], device=device)
        done = terminated or truncated

        if terminated:
            next_state = None
        else:
            next_state = torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

        # Store the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the next state
        state = next_state

        # Perform one step of the optimization (on the policy network)
        optimize_model()

        # Soft update of the target network's weights
        # θ′ ← τ θ + (1 −τ )θ′
        target_net_state_dict = target_net.state_dict()
        policy_net_state_dict = policy_net.state_dict()
        for key in policy_net_state_dict:
            target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
        target_net.load_state_dict(target_net_state_dict)

        if done:
            episode_durations.append(t + 1)
            break

print("std: ", stdev(episode_durations))
print("mean: ", mean(episode_durations))
print("maxL: ", max(episode_durations))

with open('dqcnn_results.txt', 'w') as f:
    f.write(f"{mean(episode_durations)}")

Overwriting run_trial.py


In [2]:
import optuna
import gc
from statistics import mean, stdev


def objective(trial):
    # Define Hyperparameters
    lr = trial.suggest_float('lr', 1e-5, 1e-3, log=True)
    BS = trial.suggest_categorical('batch_size',
                                            [64, 128, 256])
    G = trial.suggest_float('gamma', 0.88, 0.99)
    T = trial.suggest_float('tau', 0.001, 0.01)

    # Run python script with parameters
    !python run_trial.py --lr $lr --batch_size $BS --gamma $G --tau $T

    # Clean up memory
    gc.collect()

    with open('dqcnn_results.txt', 'r') as f:
        mean_reward = float(f.read())
    print(f"Mean reward: {mean_reward}")
    return mean_reward


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=32)

print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# save tuna results
study.trials_dataframe().to_csv('dqcnn_tuna_results.csv')
!cp dqcnn_tuna_results.csv "/content/drive/MyDrive/MSAI/spr24/RL/"
# visualize
optuna.visualization.plot_optimization_history(study)

[I 2024-04-24 12:54:39,446] A new study created in memory with name: no-name-057fb724-8c23-4a37-aa58-d5583220ba1c


pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 0.00017564655025175579
Gamma: 0.9681211331147841
Tau: 0.008001256929323113
std:  40.001301351392335
mean:  78.6815
maxL:  355


[I 2024-04-24 13:13:14,306] Trial 0 finished with value: 78.6815 and parameters: {'lr': 0.00017564655025175579, 'batch_size': 128, 'model_size': 'medium', 'gamma': 0.9681211331147841, 'tau': 0.008001256929323113}. Best is trial 0 with value: 78.6815.


Mean reward: 78.6815
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0009121957338150469
Gamma: 0.9313249608727731
Tau: 0.009498521384071975
std:  41.33713316485729
mean:  81.849
maxL:  348


[I 2024-04-24 13:36:23,217] Trial 1 finished with value: 81.849 and parameters: {'lr': 0.0009121957338150469, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9313249608727731, 'tau': 0.009498521384071975}. Best is trial 1 with value: 81.849.


Mean reward: 81.849
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0009664478893880014
Gamma: 0.9620442110986965
Tau: 0.006875530654026101
std:  50.590697606794976
mean:  86.455
maxL:  481


[I 2024-04-24 14:01:13,417] Trial 2 finished with value: 86.455 and parameters: {'lr': 0.0009664478893880014, 'batch_size': 256, 'model_size': 'medium', 'gamma': 0.9620442110986965, 'tau': 0.006875530654026101}. Best is trial 2 with value: 86.455.


Mean reward: 86.455
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 4.485539183993879e-05
Gamma: 0.968966241687172
Tau: 0.0011717909415261264
std:  42.09339801950437
mean:  77.6295
maxL:  480


[I 2024-04-24 14:19:41,746] Trial 3 finished with value: 77.6295 and parameters: {'lr': 4.485539183993879e-05, 'batch_size': 128, 'model_size': 'large', 'gamma': 0.968966241687172, 'tau': 0.0011717909415261264}. Best is trial 2 with value: 86.455.


Mean reward: 77.6295
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 64
Learning rate: 0.000431745088455759
Gamma: 0.8826779188216232
Tau: 0.00714371950573442
std:  33.88063182123827
mean:  71.144
maxL:  401


[I 2024-04-24 14:34:07,927] Trial 4 finished with value: 71.144 and parameters: {'lr': 0.000431745088455759, 'batch_size': 64, 'model_size': 'small', 'gamma': 0.8826779188216232, 'tau': 0.00714371950573442}. Best is trial 2 with value: 86.455.


Mean reward: 71.144
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 1.3554220579265984e-05
Gamma: 0.9599263660373278
Tau: 0.0030665569670349883
std:  22.534705994024385
mean:  65.2635
maxL:  213


[I 2024-04-24 14:49:25,009] Trial 5 finished with value: 65.2635 and parameters: {'lr': 1.3554220579265984e-05, 'batch_size': 128, 'model_size': 'medium', 'gamma': 0.9599263660373278, 'tau': 0.0030665569670349883}. Best is trial 2 with value: 86.455.


Mean reward: 65.2635
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 0.00025295571947379165
Gamma: 0.9290090548357925
Tau: 0.0022845264311459317
std:  39.48505705789765
mean:  78.202
maxL:  425


[I 2024-04-24 15:07:52,495] Trial 6 finished with value: 78.202 and parameters: {'lr': 0.00025295571947379165, 'batch_size': 128, 'model_size': 'medium', 'gamma': 0.9290090548357925, 'tau': 0.0022845264311459317}. Best is trial 2 with value: 86.455.


Mean reward: 78.202
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 0.0003361398955745435
Gamma: 0.9364861976143245
Tau: 0.0046934424239105125
std:  35.47344644168685
mean:  75.1195
maxL:  312


[I 2024-04-24 15:25:25,216] Trial 7 finished with value: 75.1195 and parameters: {'lr': 0.0003361398955745435, 'batch_size': 128, 'model_size': 'small', 'gamma': 0.9364861976143245, 'tau': 0.0046934424239105125}. Best is trial 2 with value: 86.455.


Mean reward: 75.1195
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00027606132114150876
Gamma: 0.9342852469707638
Tau: 0.0017076207820913762
std:  43.76389331605552
mean:  80.909
maxL:  348


[I 2024-04-24 15:48:02,801] Trial 8 finished with value: 80.909 and parameters: {'lr': 0.00027606132114150876, 'batch_size': 256, 'model_size': 'small', 'gamma': 0.9342852469707638, 'tau': 0.0017076207820913762}. Best is trial 2 with value: 86.455.


Mean reward: 80.909
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 64
Learning rate: 0.0009137083722247889
Gamma: 0.9853925910030875
Tau: 0.0053983730471448416
std:  38.89076730910494
mean:  78.408
maxL:  461


[I 2024-04-24 16:04:28,713] Trial 9 finished with value: 78.408 and parameters: {'lr': 0.0009137083722247889, 'batch_size': 64, 'model_size': 'medium', 'gamma': 0.9853925910030875, 'tau': 0.0053983730471448416}. Best is trial 2 with value: 86.455.


Mean reward: 78.408
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 6.37634079827441e-05
Gamma: 0.8985735048561767
Tau: 0.006657921950470906
std:  32.90440245393351
mean:  72.843
maxL:  348


[I 2024-04-24 16:25:19,069] Trial 10 finished with value: 72.843 and parameters: {'lr': 6.37634079827441e-05, 'batch_size': 256, 'model_size': 'medium', 'gamma': 0.8985735048561767, 'tau': 0.006657921950470906}. Best is trial 2 with value: 86.455.


Mean reward: 72.843
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0009668359683999224
Gamma: 0.9180678982581054
Tau: 0.00953932681720496
std:  46.51629158360913
mean:  85.15
maxL:  409


[I 2024-04-24 16:49:20,602] Trial 11 finished with value: 85.15 and parameters: {'lr': 0.0009668359683999224, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9180678982581054, 'tau': 0.00953932681720496}. Best is trial 2 with value: 86.455.


Mean reward: 85.15
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0006321635222586329
Gamma: 0.9099268170898538
Tau: 0.009932078536855505
std:  42.412047898014215
mean:  80.328
maxL:  389


[I 2024-04-24 17:12:30,049] Trial 12 finished with value: 80.328 and parameters: {'lr': 0.0006321635222586329, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9099268170898538, 'tau': 0.009932078536855505}. Best is trial 2 with value: 86.455.


Mean reward: 80.328
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00014170285332843215
Gamma: 0.9507622718263072
Tau: 0.008481712951401472
std:  48.09845845609739
mean:  84.855
maxL:  425


[I 2024-04-24 17:36:09,589] Trial 13 finished with value: 84.855 and parameters: {'lr': 0.00014170285332843215, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9507622718263072, 'tau': 0.008481712951401472}. Best is trial 2 with value: 86.455.


Mean reward: 84.855
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0005512409477439518
Gamma: 0.9095555257909912
Tau: 0.0043728858607654025
std:  33.43319907517377
mean:  69.717
maxL:  359


[I 2024-04-24 17:55:54,501] Trial 14 finished with value: 69.717 and parameters: {'lr': 0.0005512409477439518, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9095555257909912, 'tau': 0.0043728858607654025}. Best is trial 2 with value: 86.455.


Mean reward: 69.717
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 1.1770770853568454e-05
Gamma: 0.9882022302035249
Tau: 0.0065387590258514
std:  17.81003527307608
mean:  59.0935
maxL:  199


[I 2024-04-24 18:13:42,629] Trial 15 finished with value: 59.0935 and parameters: {'lr': 1.1770770853568454e-05, 'batch_size': 256, 'model_size': 'medium', 'gamma': 0.9882022302035249, 'tau': 0.0065387590258514}. Best is trial 2 with value: 86.455.


Mean reward: 59.0935
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 64
Learning rate: 2.922048002525846e-05
Gamma: 0.949055990145032
Tau: 0.008542482093121286
std:  28.85426804464456
mean:  72.55
maxL:  312


[I 2024-04-24 18:29:13,126] Trial 16 finished with value: 72.55 and parameters: {'lr': 2.922048002525846e-05, 'batch_size': 64, 'model_size': 'large', 'gamma': 0.949055990145032, 'tau': 0.008542482093121286}. Best is trial 2 with value: 86.455.


Mean reward: 72.55
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 9.970874441597265e-05
Gamma: 0.9136743427986681
Tau: 0.007638979460063958
std:  37.638734661561905
mean:  76.6095
maxL:  360


[I 2024-04-24 18:51:45,136] Trial 17 finished with value: 76.6095 and parameters: {'lr': 9.970874441597265e-05, 'batch_size': 256, 'model_size': 'medium', 'gamma': 0.9136743427986681, 'tau': 0.007638979460063958}. Best is trial 2 with value: 86.455.


Mean reward: 76.6095
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0007240286269111403
Gamma: 0.9209212374052363
Tau: 0.009135102769096666
std:  37.69043745484747
mean:  76.453
maxL:  502


[I 2024-04-24 19:14:18,557] Trial 18 finished with value: 76.453 and parameters: {'lr': 0.0007240286269111403, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9209212374052363, 'tau': 0.009135102769096666}. Best is trial 2 with value: 86.455.


Mean reward: 76.453
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00045629103590286656
Gamma: 0.8890729570807044
Tau: 0.005773554422484147
std:  39.52367689775028
mean:  77.345
maxL:  463


[I 2024-04-24 19:36:29,558] Trial 19 finished with value: 77.345 and parameters: {'lr': 0.00045629103590286656, 'batch_size': 256, 'model_size': 'small', 'gamma': 0.8890729570807044, 'tau': 0.005773554422484147}. Best is trial 2 with value: 86.455.


Mean reward: 77.345
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 64
Learning rate: 0.000169044170684921
Gamma: 0.9498590770549784
Tau: 0.0034430715228084977
std:  41.70548725987735
mean:  79.8975
maxL:  425


[I 2024-04-24 19:53:02,305] Trial 20 finished with value: 79.8975 and parameters: {'lr': 0.000169044170684921, 'batch_size': 64, 'model_size': 'large', 'gamma': 0.9498590770549784, 'tau': 0.0034430715228084977}. Best is trial 2 with value: 86.455.


Mean reward: 79.8975
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00012034936121760006
Gamma: 0.9484693184513387
Tau: 0.008697794528920383
std:  45.48153607852346
mean:  84.631
maxL:  425


[I 2024-04-24 20:16:52,523] Trial 21 finished with value: 84.631 and parameters: {'lr': 0.00012034936121760006, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9484693184513387, 'tau': 0.008697794528920383}. Best is trial 2 with value: 86.455.


Mean reward: 84.631
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0009733095895360589
Gamma: 0.9748851506127703
Tau: 0.008045281642203514
std:  41.07748409530213
mean:  79.0805
maxL:  316


[I 2024-04-24 20:40:00,379] Trial 22 finished with value: 79.0805 and parameters: {'lr': 0.0009733095895360589, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9748851506127703, 'tau': 0.008045281642203514}. Best is trial 2 with value: 86.455.


Mean reward: 79.0805
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 2.4463129916161673e-05
Gamma: 0.9583473707450728
Tau: 0.00888311660586814
std:  36.369074758638
mean:  78.766
maxL:  360


[I 2024-04-24 21:03:44,499] Trial 23 finished with value: 78.766 and parameters: {'lr': 2.4463129916161673e-05, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9583473707450728, 'tau': 0.00888311660586814}. Best is trial 2 with value: 86.455.


Mean reward: 78.766
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 7.491907680009718e-05
Gamma: 0.9409390730645204
Tau: 0.009991543456066617
std:  42.93124071949071
mean:  79.546
maxL:  425


[I 2024-04-24 21:26:42,260] Trial 24 finished with value: 79.546 and parameters: {'lr': 7.491907680009718e-05, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9409390730645204, 'tau': 0.009991543456066617}. Best is trial 2 with value: 86.455.


Mean reward: 79.546
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0003456910723302612
Gamma: 0.9238939357621286
Tau: 0.007382967337868394
std:  43.639482653018604
mean:  83.266
maxL:  359


[I 2024-04-24 21:49:49,741] Trial 25 finished with value: 83.266 and parameters: {'lr': 0.0003456910723302612, 'batch_size': 256, 'model_size': 'medium', 'gamma': 0.9238939357621286, 'tau': 0.007382967337868394}. Best is trial 2 with value: 86.455.


Mean reward: 83.266
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00019348840972027096
Gamma: 0.9598669711491383
Tau: 0.0062861024013329565
std:  45.924376249623236
mean:  83.5345
maxL:  474


[I 2024-04-24 22:13:47,400] Trial 26 finished with value: 83.5345 and parameters: {'lr': 0.00019348840972027096, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9598669711491383, 'tau': 0.0062861024013329565}. Best is trial 2 with value: 86.455.


Mean reward: 83.5345
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0006701803879215573
Gamma: 0.9781015955929993
Tau: 0.0081551630410963
std:  50.23917319444689
mean:  83.408
maxL:  514


[I 2024-04-24 22:37:17,244] Trial 27 finished with value: 83.408 and parameters: {'lr': 0.0006701803879215573, 'batch_size': 256, 'model_size': 'large', 'gamma': 0.9781015955929993, 'tau': 0.0081551630410963}. Best is trial 2 with value: 86.455.


Mean reward: 83.408
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.0004772709090796336
Gamma: 0.9448743797678584
Tau: 0.009312176229764513
std:  42.16133250656773
mean:  79.5695
maxL:  466


[I 2024-04-24 22:59:20,526] Trial 28 finished with value: 79.5695 and parameters: {'lr': 0.0004772709090796336, 'batch_size': 256, 'model_size': 'small', 'gamma': 0.9448743797678584, 'tau': 0.009312176229764513}. Best is trial 2 with value: 86.455.


Mean reward: 79.5695
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 64
Learning rate: 0.0001765912168805269
Gamma: 0.9667164701393973
Tau: 0.008139815162126472
std:  30.991005343584824
mean:  70.447
maxL:  348


[I 2024-04-24 23:13:56,418] Trial 29 finished with value: 70.447 and parameters: {'lr': 0.0001765912168805269, 'batch_size': 64, 'model_size': 'medium', 'gamma': 0.9667164701393973, 'tau': 0.008139815162126472}. Best is trial 2 with value: 86.455.


Mean reward: 70.447
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 128
Learning rate: 0.00011986859983406246
Gamma: 0.9544037142461727
Tau: 0.007397846635917882
std:  36.9021993539488
mean:  75.413
maxL:  464


[I 2024-04-24 23:31:47,788] Trial 30 finished with value: 75.413 and parameters: {'lr': 0.00011986859983406246, 'batch_size': 128, 'model_size': 'medium', 'gamma': 0.9544037142461727, 'tau': 0.007397846635917882}. Best is trial 2 with value: 86.455.


Mean reward: 75.413
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
cuda is available:  True
Batch size: 256
Learning rate: 0.00011912364845963011
Gamma: 0.9419522427480308
Tau: 0.009235307280301074
