In [4]:
import optuna

In [5]:
import flappy_bird_gymnasium
import gymnasium
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch import randint

In [6]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
from PIL import Image
import time
import os
import model

cuda


In [7]:
def create_agent(env,BUFFER_SIZE = int(1e5),BATCH_SIZE = 64,GAMMA = 0.99,TAU = 1e-3,UPDATE_EVERY=4):
        # if gpu is to be used
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    class QNetwork(nn.Module):
        def __init__(self, state_size, action_size, seed):
            """
            Initialize parameters and build model.
            Params:
            - state_size (int): Dimension of each state
            - action_size (int): Dimension of each action
            - seed (int): Random seed
            - fc1_unit (int): Number of nodes in first hidden layer
            - fc2_unit (int): Number of nodes in second hidden layer
            """
            super(QNetwork, self).__init__() ## calls __init__ method of nn.Module class
            self.seed = torch.manual_seed(seed)

            
            fc1_unit=128
            fc2_unit=128
            self.fc1=nn.Linear(state_size,fc1_unit)
            self.fc2=nn.Linear(fc1_unit,fc2_unit)
            self.fc3=nn.Linear(fc2_unit,action_size)
            

        def forward(self, state):
            """
            Build a network that maps state -> action values.
            """
            x=F.relu(self.fc1(state))
            x=F.relu(self.fc2(x))
            return self.fc3(x)
            
    class Agent():
        """
        Interacts with and learns form environment.
        """

        def __init__(self, state_size, action_size, seed):
            """
            Initialize an Agent object.
            Params:
            - state_size (int): dimension of each state
            - action_size (int): dimension of each action
            - seed (int): random seed
            """

            self.state_size = state_size
            self.action_size = action_size
            self.seed = random.seed(seed)

            # Q-Network
            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)

            self.optimizer = optim.Adam(self.qnetwork_local.parameters(),lr=0.0001)

            # Replay Memory
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

            # Initialize time step (for updating every UPDATE_EVERY steps)
            self.t_step = 0

        def step(self, state, action, reward, next_step, done):
            # Save experience in replay memory
            self.memory.add(state, action, reward, next_step, done)

            # Learn every UPDATE_EVERY time steps.
            self.t_step = (self.t_step+1) % UPDATE_EVERY
            if self.t_step == 0:
                # If enough samples are available in memory, get random subset and learn
                if len(self.memory) > BATCH_SIZE:
                    experience = self.memory.sample()
                    self.learn(experience, GAMMA)


        def act(self, state, eps = 0):
            """
            Returns action for given state as per current policy.
            Params:
            - state (array_like): current state
            - eps (float): epsilon, for epsilon-greedy action selection
            """
            state = torch.from_numpy(state).float().unsqueeze(0).to(device)
            self.qnetwork_local.eval()
            with torch.no_grad():
                action_values = self.qnetwork_local(state)
            self.qnetwork_local.train()

            # Epsilon-greedy action selction
            if random.random() > eps:
                return np.argmax(action_values.cpu().data.numpy())
            else:
                return random.choice(np.arange(self.action_size))

        def learn(self, experiences, gamma):
            """
            Update value parameters using given batch of experience tuples.
            Params:
            - experiences (Tuple[torch.Variable]): tuple of (s, a, r, s', done) tuples
            - gamma (float): discount factor
            """
            states, actions, rewards, next_states, dones = experiences

            
            ## TODO: compute and minimize the loss
            # Get max predicted Q values (for next states) from target model
            Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)

            # Compute Q targets for current states
            Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

            # Get expected Q values from local model
            Q_expected = self.qnetwork_local(states).gather(1, actions)

            # Compute loss
            loss = F.mse_loss(Q_expected, Q_targets)

            # Minimize the loss
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            

            # ------------------- update target network ------------------- #
            self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)

        def soft_update(self, local_model, target_model, tau):
            """
            Soft update model parameters.
            θ_target = τ*θ_local + (1 - τ)*θ_target

            Params:
            - local model (PyTorch model): weights will be copied from
            - target model (PyTorch model): weights will be copied to
            - tau (float): interpolation parameter
            """
            for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
                target_param.data.copy_(tau*local_param.data + (1-tau)*target_param.data)


    class ReplayBuffer:
        """
        Fixed-size buffe to store experience tuples.
        """
        def __init__(self, action_size, buffer_size, batch_size, seed):
            """
            Initialize a ReplayBuffer object.
            Params:
            - action_size (int): dimension of each action
            - buffer_size (int): maximum size of buffer
            - batch_size (int): size of each training batch
            - seed (int): random seed
            """
            self.action_size = action_size
            self.memory = deque(maxlen=buffer_size)
            self.batch_size = batch_size
            self.experiences = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])

            self.seed = random.seed(seed)

        def add(self,state, action, reward, next_state,done):
            """
            Add a new experience to memory.
            """
            e = self.experiences(state,action,reward,next_state,done)
            self.memory.append(e)

        def sample(self):
            """
            Randomly sample a batch of experiences from memory.
            """
            experiences = random.sample(self.memory, k=self.batch_size)

            states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
            actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
            rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
            next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
            dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)

            return (states,actions,rewards,next_states,dones)

        def __len__(self):
            """
            Return the current size of internal memory.
            """
            return len(self.memory)
    return Agent(state_size=env.observation_space.shape[0], action_size=env.action_space.n, seed=0)

In [8]:
def objective(trial):
    env = gymnasium.make("FlappyBird-v0")

    # set up matplotlib
    is_ipython = 'inline' in matplotlib.get_backend()
    if is_ipython:
        from IPython import display

    plt.ion()

    # if gpu is to be used
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    #Parameter
    BUFFER_SIZE = trial.suggest_int("BUFFER_SIZE", 1e4, 1e6)
    BATCH_SIZE = trial.suggest_int("BATCH_SIZE", 32, 256)
    GAMMA = trial.suggest_float("GAMMA", 0.8, 0.999)
    TAU = trial.suggest_float("TAU", 1e-4, 1e-2)
    UPDATE_EVERY = trial.suggest_int("UPDATE_EVERY", 1, 10)

    

    # Init agent
    agent = create_agent(env,BUFFER_SIZE,BATCH_SIZE,GAMMA,TAU,UPDATE_EVERY)
    checkpoint_path = "checkpoints/model_checkpoint"
    checkpoint_dir = "checkpoints"
    os.makedirs(checkpoint_dir, exist_ok=True)  

    def DQN(n_episodes=1000, eps_start=0.9, eps_end=0.01, eps_decay=0.995):
        """
        Deep Q-Learning Training
        """
        scores = []  
        scores_window = deque(maxlen=100)  #100 scores
        eps = eps_start
        best_score = 1


        for i_episode in range(n_episodes):
            state, _ = env.reset()
            score = 0
            done = False

            while not done:
                action = agent.act(state, eps)
                next_state, reward, done, _, _ = env.step(action)
                agent.step(state, action, reward, next_state, done)

                state = next_state
                score += reward

            scores.append(score)
            scores_window.append(score)
            eps = max(eps * eps_decay, eps_end)  #Decrease eps

            # Save highest checkpoint
            score_avg = np.mean(scores_window)
            if score_avg > best_score:
                best_score = score_avg
                torch.save(agent.qnetwork_local.state_dict(), f"{checkpoint_path}_best_{score_avg:.2f}.pth")
                print(f" New best model saved! Score: {score_avg:.2f}")

            # Save checkpoint every 1000 eps
            if i_episode % 500 == 0:
                torch.save(agent.qnetwork_local.state_dict(), f"{checkpoint_path}_{i_episode}_{score_avg:.2f}.pth")
                print(f"Checkpoint saved at {time.strftime('%Y-%m-%d %H:%M:%S')}")

            print(f'\rEpisode {i_episode}\tAverage Score: {score_avg:.2f}', end="")

            # Early stop
            if score_avg >= 100:
                print(f'\n✅ Environment solved in {i_episode} episodes! Avg Score: {score_avg:.2f}')
                torch.save(agent.qnetwork_local.state_dict(), 'final_model.pth')
                break

        env.close()  
        return np.mean(scores_window)
    return DQN()

In [None]:
study = optuna.create_study(direction="maximize",study_name="DQN_trials", storage="sqlite:///DQN_trials.db'")
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_params)

[I 2025-03-03 07:45:57,063] A new study created in memory with name: no-name-91474039-6fe6-4ae2-aee8-4eedccc72cc3


cuda
cuda


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


Checkpoint saved at 2025-03-03 07:46:00
Episode 499	Average Score: -1.40Checkpoint saved at 2025-03-03 07:46:40
Episode 996	Average Score: -1.39

[I 2025-03-03 07:47:25,071] Trial 0 finished with value: -1.5309999999999997 and parameters: {'BUFFER_SIZE': 146575, 'BATCH_SIZE': 42, 'GAMMA': 0.9378342300559974, 'TAU': 0.003043388258881088, 'UPDATE_EVERY': 5}. Best is trial 0 with value: -1.5309999999999997.


Episode 999	Average Score: -1.53cuda
cuda
Checkpoint saved at 2025-03-03 07:47:25
Episode 499	Average Score: -1.71Checkpoint saved at 2025-03-03 07:48:08
Episode 997	Average Score: -0.35

[I 2025-03-03 07:48:56,925] Trial 1 finished with value: -0.2779999999999997 and parameters: {'BUFFER_SIZE': 610314, 'BATCH_SIZE': 70, 'GAMMA': 0.9928426923965459, 'TAU': 0.0028448637873904436, 'UPDATE_EVERY': 4}. Best is trial 1 with value: -0.2779999999999997.


Episode 999	Average Score: -0.28cuda
cuda
Checkpoint saved at 2025-03-03 07:48:56
Episode 499	Average Score: -2.01Checkpoint saved at 2025-03-03 07:49:35
Episode 996	Average Score: -1.43

[I 2025-03-03 07:50:21,365] Trial 2 finished with value: -1.4890000000000003 and parameters: {'BUFFER_SIZE': 548012, 'BATCH_SIZE': 140, 'GAMMA': 0.839288578382608, 'TAU': 0.002240971212007394, 'UPDATE_EVERY': 8}. Best is trial 1 with value: -0.2779999999999997.


Episode 999	Average Score: -1.49cuda
cuda
Checkpoint saved at 2025-03-03 07:50:21
Episode 499	Average Score: -2.18Checkpoint saved at 2025-03-03 07:51:00
Episode 998	Average Score: -2.17

[I 2025-03-03 07:51:41,831] Trial 3 finished with value: -2.173 and parameters: {'BUFFER_SIZE': 77236, 'BATCH_SIZE': 116, 'GAMMA': 0.8282793410955961, 'TAU': 0.008181295613864688, 'UPDATE_EVERY': 10}. Best is trial 1 with value: -0.2779999999999997.


Episode 999	Average Score: -2.17cuda
cuda
Checkpoint saved at 2025-03-03 07:51:41
Episode 499	Average Score: -2.18Checkpoint saved at 2025-03-03 07:52:28
Episode 999	Average Score: -0.27

[I 2025-03-03 07:53:15,508] Trial 4 finished with value: -0.269999999999999 and parameters: {'BUFFER_SIZE': 994447, 'BATCH_SIZE': 61, 'GAMMA': 0.9272587907462511, 'TAU': 0.009315275660324227, 'UPDATE_EVERY': 3}. Best is trial 4 with value: -0.269999999999999.


cuda
cuda
Checkpoint saved at 2025-03-03 07:53:15
Episode 499	Average Score: -1.52Checkpoint saved at 2025-03-03 07:53:58
Episode 998	Average Score: 0.416

[I 2025-03-03 07:54:45,774] Trial 5 finished with value: 0.43700000000000017 and parameters: {'BUFFER_SIZE': 304051, 'BATCH_SIZE': 213, 'GAMMA': 0.9071174560265539, 'TAU': 0.00902502641130395, 'UPDATE_EVERY': 7}. Best is trial 5 with value: 0.43700000000000017.


Episode 999	Average Score: 0.44cuda
cuda
Checkpoint saved at 2025-03-03 07:54:45
Episode 499	Average Score: -1.67Checkpoint saved at 2025-03-03 07:55:23
Episode 998	Average Score: -1.52

[I 2025-03-03 07:56:03,313] Trial 6 finished with value: -1.4599999999999995 and parameters: {'BUFFER_SIZE': 152069, 'BATCH_SIZE': 44, 'GAMMA': 0.8214151961226296, 'TAU': 0.008741142190488493, 'UPDATE_EVERY': 9}. Best is trial 5 with value: 0.43700000000000017.


Episode 999	Average Score: -1.46cuda
cuda
Checkpoint saved at 2025-03-03 07:56:03
Episode 499	Average Score: -2.97Checkpoint saved at 2025-03-03 07:56:46
Episode 996	Average Score: -1.81

[I 2025-03-03 07:57:31,330] Trial 7 finished with value: -1.8269999999999993 and parameters: {'BUFFER_SIZE': 326620, 'BATCH_SIZE': 199, 'GAMMA': 0.8254747578814889, 'TAU': 0.00034196735233360705, 'UPDATE_EVERY': 6}. Best is trial 5 with value: 0.43700000000000017.


Episode 999	Average Score: -1.83cuda
cuda
Checkpoint saved at 2025-03-03 07:57:31
Episode 499	Average Score: -0.89Checkpoint saved at 2025-03-03 07:58:51
Episode 741	Average Score: 0.981 New best model saved! Score: 1.00
Episode 742	Average Score: 1.00 New best model saved! Score: 1.01
Episode 743	Average Score: 1.01 New best model saved! Score: 1.02
Episode 795	Average Score: 1.00 New best model saved! Score: 1.03
Episode 796	Average Score: 1.03 New best model saved! Score: 1.06
Episode 797	Average Score: 1.06 New best model saved! Score: 1.07
Episode 798	Average Score: 1.07 New best model saved! Score: 1.07
Episode 799	Average Score: 1.07 New best model saved! Score: 1.08
Episode 811	Average Score: 1.07 New best model saved! Score: 1.09
Episode 831	Average Score: 1.07 New best model saved! Score: 1.10
Episode 832	Average Score: 1.10 New best model saved! Score: 1.11
Episode 856	Average Score: 1.10 New best model saved! Score: 1.12
Episode 876	Average Score: 1.11 New best model saved!

[I 2025-03-03 08:00:17,103] Trial 8 finished with value: 1.4229999999999998 and parameters: {'BUFFER_SIZE': 720880, 'BATCH_SIZE': 88, 'GAMMA': 0.9210254139000001, 'TAU': 0.0035125218327885085, 'UPDATE_EVERY': 1}. Best is trial 8 with value: 1.4229999999999998.


Episode 999	Average Score: 1.42cuda
cuda
Checkpoint saved at 2025-03-03 08:00:17
Episode 499	Average Score: -2.27Checkpoint saved at 2025-03-03 08:01:00
Episode 996	Average Score: 0.921

[I 2025-03-03 08:01:50,019] Trial 9 finished with value: 0.9789999999999998 and parameters: {'BUFFER_SIZE': 659518, 'BATCH_SIZE': 170, 'GAMMA': 0.8225902236360965, 'TAU': 0.003277070231951799, 'UPDATE_EVERY': 5}. Best is trial 8 with value: 1.4229999999999998.


 New best model saved! Score: 1.01
Episode 999	Average Score: 0.98cuda
cuda
Checkpoint saved at 2025-03-03 08:01:50
Episode 499	Average Score: -0.77Checkpoint saved at 2025-03-03 08:03:11
Episode 627	Average Score: 0.970 New best model saved! Score: 1.01
Episode 633	Average Score: 1.01 New best model saved! Score: 1.02
Episode 678	Average Score: 0.98 New best model saved! Score: 1.02
Episode 679	Average Score: 1.02 New best model saved! Score: 1.07
Episode 682	Average Score: 1.05 New best model saved! Score: 1.07
Episode 683	Average Score: 1.07 New best model saved! Score: 1.11
Episode 694	Average Score: 1.10 New best model saved! Score: 1.13
Episode 695	Average Score: 1.13 New best model saved! Score: 1.17
Episode 696	Average Score: 1.17 New best model saved! Score: 1.21
Episode 697	Average Score: 1.21 New best model saved! Score: 1.21
Episode 701	Average Score: 1.20 New best model saved! Score: 1.23
Episode 706	Average Score: 1.19 New best model saved! Score: 1.23
Episode 707	Average

[I 2025-03-03 08:04:37,710] Trial 10 finished with value: 1.6069999999999993 and parameters: {'BUFFER_SIZE': 880316, 'BATCH_SIZE': 104, 'GAMMA': 0.8796196369576921, 'TAU': 0.0059089508823979234, 'UPDATE_EVERY': 1}. Best is trial 10 with value: 1.6069999999999993.


Episode 999	Average Score: 1.61cuda
cuda
Checkpoint saved at 2025-03-03 08:04:37
Episode 499	Average Score: -0.53Checkpoint saved at 2025-03-03 08:05:56
Episode 883	Average Score: 0.991 New best model saved! Score: 1.06
Episode 885	Average Score: 1.02 New best model saved! Score: 1.07
Episode 898	Average Score: 1.02 New best model saved! Score: 1.08
Episode 899	Average Score: 1.08 New best model saved! Score: 1.08
Episode 905	Average Score: 1.06 New best model saved! Score: 1.10
Episode 906	Average Score: 1.10 New best model saved! Score: 1.11
Episode 907	Average Score: 1.11 New best model saved! Score: 1.11
Episode 908	Average Score: 1.11 New best model saved! Score: 1.14
Episode 909	Average Score: 1.14 New best model saved! Score: 1.15
Episode 910	Average Score: 1.15 New best model saved! Score: 1.20
Episode 911	Average Score: 1.20 New best model saved! Score: 1.25
Episode 914	Average Score: 1.24 New best model saved! Score: 1.28
Episode 922	Average Score: 1.26 New best model saved! 

[I 2025-03-03 08:07:35,146] Trial 11 finished with value: 2.116999999999997 and parameters: {'BUFFER_SIZE': 855926, 'BATCH_SIZE': 99, 'GAMMA': 0.876286428336331, 'TAU': 0.006149184620515133, 'UPDATE_EVERY': 1}. Best is trial 11 with value: 2.116999999999997.


Episode 999	Average Score: 2.12cuda
cuda
Checkpoint saved at 2025-03-03 08:07:35
Episode 499	Average Score: -0.49Checkpoint saved at 2025-03-03 08:08:56
Episode 709	Average Score: 0.985 New best model saved! Score: 1.01
Episode 712	Average Score: 1.00 New best model saved! Score: 1.04
Episode 714	Average Score: 1.04 New best model saved! Score: 1.13
Episode 715	Average Score: 1.13 New best model saved! Score: 1.17
Episode 718	Average Score: 1.17 New best model saved! Score: 1.21
Episode 721	Average Score: 1.19 New best model saved! Score: 1.23
Episode 725	Average Score: 1.20 New best model saved! Score: 1.23
Episode 726	Average Score: 1.23 New best model saved! Score: 1.29
Episode 728	Average Score: 1.28 New best model saved! Score: 1.33
Episode 729	Average Score: 1.33 New best model saved! Score: 1.40
Episode 730	Average Score: 1.40 New best model saved! Score: 1.40
Episode 731	Average Score: 1.40 New best model saved! Score: 1.46
Episode 732	Average Score: 1.46 New best model saved! 

[I 2025-03-03 08:10:37,973] Trial 12 finished with value: 2.758999999999996 and parameters: {'BUFFER_SIZE': 967209, 'BATCH_SIZE': 112, 'GAMMA': 0.8725301994106025, 'TAU': 0.006337696494609, 'UPDATE_EVERY': 1}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 2.76cuda
cuda
Checkpoint saved at 2025-03-03 08:10:38
Episode 499	Average Score: -1.33Checkpoint saved at 2025-03-03 08:11:36
Episode 736	Average Score: 0.971 New best model saved! Score: 1.00
Episode 737	Average Score: 1.00 New best model saved! Score: 1.03
Episode 755	Average Score: 1.03 New best model saved! Score: 1.06
Episode 759	Average Score: 1.04 New best model saved! Score: 1.09
Episode 760	Average Score: 1.09 New best model saved! Score: 1.10
Episode 761	Average Score: 1.10 New best model saved! Score: 1.13
Episode 762	Average Score: 1.13 New best model saved! Score: 1.14
Episode 763	Average Score: 1.14 New best model saved! Score: 1.17
Episode 764	Average Score: 1.17 New best model saved! Score: 1.17
Episode 765	Average Score: 1.17 New best model saved! Score: 1.21
Episode 770	Average Score: 1.19 New best model saved! Score: 1.22
Episode 771	Average Score: 1.22 New best model saved! Score: 1.24
Episode 772	Average Score: 1.24 New best model saved! 

[I 2025-03-03 08:12:37,253] Trial 13 finished with value: 1.5249999999999997 and parameters: {'BUFFER_SIZE': 837748, 'BATCH_SIZE': 144, 'GAMMA': 0.8693192860460718, 'TAU': 0.006659477035136694, 'UPDATE_EVERY': 2}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.52cuda
cuda
Checkpoint saved at 2025-03-03 08:12:37
Episode 499	Average Score: -2.56Checkpoint saved at 2025-03-03 08:13:25
Episode 965	Average Score: 0.94 New best model saved! Score: 1.04
Episode 966	Average Score: 1.04 New best model saved! Score: 1.07
Episode 967	Average Score: 1.07 New best model saved! Score: 1.11
Episode 968	Average Score: 1.11 New best model saved! Score: 1.12
Episode 991	Average Score: 1.11 New best model saved! Score: 1.13
Episode 995	Average Score: 1.12 New best model saved! Score: 1.15
Episode 996	Average Score: 1.15 New best model saved! Score: 1.16
Episode 998	Average Score: 1.13

[I 2025-03-03 08:14:18,183] Trial 14 finished with value: 1.15 and parameters: {'BUFFER_SIZE': 989931, 'BATCH_SIZE': 121, 'GAMMA': 0.8623562028232696, 'TAU': 0.007010172046349896, 'UPDATE_EVERY': 3}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.15cuda
cuda
Checkpoint saved at 2025-03-03 08:14:18
Episode 499	Average Score: -0.19Checkpoint saved at 2025-03-03 08:15:56
Episode 694	Average Score: 0.993 New best model saved! Score: 1.01
Episode 695	Average Score: 1.01 New best model saved! Score: 1.02
Episode 696	Average Score: 1.02 New best model saved! Score: 1.03
Episode 697	Average Score: 1.03 New best model saved! Score: 1.05
Episode 698	Average Score: 1.05 New best model saved! Score: 1.06
Episode 707	Average Score: 1.06 New best model saved! Score: 1.07
Episode 708	Average Score: 1.07 New best model saved! Score: 1.09
Episode 709	Average Score: 1.09 New best model saved! Score: 1.13
Episode 710	Average Score: 1.13 New best model saved! Score: 1.15
Episode 711	Average Score: 1.15 New best model saved! Score: 1.19
Episode 718	Average Score: 1.18 New best model saved! Score: 1.21
Episode 780	Average Score: 1.19 New best model saved! Score: 1.23
Episode 788	Average Score: 1.23 New best model saved! 

[I 2025-03-03 08:17:39,609] Trial 15 finished with value: 0.8380000000000005 and parameters: {'BUFFER_SIZE': 794220, 'BATCH_SIZE': 247, 'GAMMA': 0.9532819142168417, 'TAU': 0.004602898084462479, 'UPDATE_EVERY': 1}. Best is trial 12 with value: 2.758999999999996.


cuda
cuda
Checkpoint saved at 2025-03-03 08:17:39
Episode 499	Average Score: -1.79Checkpoint saved at 2025-03-03 08:18:29
Episode 998	Average Score: 0.672

[I 2025-03-03 08:19:24,324] Trial 16 finished with value: 0.6730000000000003 and parameters: {'BUFFER_SIZE': 442671, 'BATCH_SIZE': 161, 'GAMMA': 0.8931409267502656, 'TAU': 0.0047351727630115595, 'UPDATE_EVERY': 3}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 0.67cuda
cuda
Checkpoint saved at 2025-03-03 08:19:24
Episode 499	Average Score: -1.27Checkpoint saved at 2025-03-03 08:20:18
Episode 821	Average Score: 1.000 New best model saved! Score: 1.00
Episode 824	Average Score: 0.99 New best model saved! Score: 1.03
Episode 826	Average Score: 1.01 New best model saved! Score: 1.04
Episode 827	Average Score: 1.04 New best model saved! Score: 1.04
Episode 830	Average Score: 1.03 New best model saved! Score: 1.04
Episode 831	Average Score: 1.04 New best model saved! Score: 1.08
Episode 836	Average Score: 1.00 New best model saved! Score: 1.10
Episode 843	Average Score: 1.10 New best model saved! Score: 1.18
Episode 844	Average Score: 1.18 New best model saved! Score: 1.30
Episode 845	Average Score: 1.30 New best model saved! Score: 1.34
Episode 849	Average Score: 1.32 New best model saved! Score: 1.36
Episode 850	Average Score: 1.36 New best model saved! Score: 1.39
Episode 968	Average Score: 1.37 New best model saved! 

[I 2025-03-03 08:21:20,436] Trial 17 finished with value: 1.5739999999999987 and parameters: {'BUFFER_SIZE': 895809, 'BATCH_SIZE': 83, 'GAMMA': 0.8608127233818917, 'TAU': 0.007489739515628198, 'UPDATE_EVERY': 2}. Best is trial 12 with value: 2.758999999999996.


 New best model saved! Score: 1.57
Episode 998	Average Score: 1.57 New best model saved! Score: 1.57
Episode 999	Average Score: 1.57cuda
cuda
Checkpoint saved at 2025-03-03 08:21:20
Episode 499	Average Score: -1.53Checkpoint saved at 2025-03-03 08:22:15
Episode 811	Average Score: 0.99 New best model saved! Score: 1.02
Episode 813	Average Score: 1.02 New best model saved! Score: 1.04
Episode 814	Average Score: 1.04 New best model saved! Score: 1.07
Episode 832	Average Score: 1.05 New best model saved! Score: 1.09
Episode 833	Average Score: 1.09 New best model saved! Score: 1.10
Episode 834	Average Score: 1.10 New best model saved! Score: 1.14
Episode 840	Average Score: 1.11 New best model saved! Score: 1.16
Episode 841	Average Score: 1.16 New best model saved! Score: 1.18
Episode 842	Average Score: 1.18 New best model saved! Score: 1.22
Episode 843	Average Score: 1.22 New best model saved! Score: 1.23
Episode 844	Average Score: 1.23 New best model saved! Score: 1.26
Episode 846	Average 

[I 2025-03-03 08:23:17,336] Trial 18 finished with value: 1.2519999999999996 and parameters: {'BUFFER_SIZE': 729471, 'BATCH_SIZE': 99, 'GAMMA': 0.8524012771086059, 'TAU': 0.005790788129955215, 'UPDATE_EVERY': 2}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.25cuda
cuda
Checkpoint saved at 2025-03-03 08:23:17
Episode 499	Average Score: -2.43Checkpoint saved at 2025-03-03 08:24:02
Episode 998	Average Score: 0.485

[I 2025-03-03 08:24:55,453] Trial 19 finished with value: 0.4799999999999994 and parameters: {'BUFFER_SIZE': 918617, 'BATCH_SIZE': 123, 'GAMMA': 0.8036495176347118, 'TAU': 0.005794151309036881, 'UPDATE_EVERY': 4}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 0.48cuda
cuda
Checkpoint saved at 2025-03-03 08:24:55
Episode 499	Average Score: -1.70Checkpoint saved at 2025-03-03 08:25:42
Episode 996	Average Score: -0.16

[I 2025-03-03 08:26:30,700] Trial 20 finished with value: -0.11099999999999918 and parameters: {'BUFFER_SIZE': 769401, 'BATCH_SIZE': 182, 'GAMMA': 0.8833031265289457, 'TAU': 0.009819470628200743, 'UPDATE_EVERY': 4}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: -0.11cuda
cuda
Checkpoint saved at 2025-03-03 08:26:30
Episode 499	Average Score: -0.81Checkpoint saved at 2025-03-03 08:27:52
Episode 663	Average Score: 0.962 New best model saved! Score: 1.01
Episode 664	Average Score: 1.01 New best model saved! Score: 1.05
Episode 665	Average Score: 1.05 New best model saved! Score: 1.06
Episode 669	Average Score: 1.00 New best model saved! Score: 1.06
Episode 671	Average Score: 1.06 New best model saved! Score: 1.12
Episode 672	Average Score: 1.12 New best model saved! Score: 1.14
Episode 673	Average Score: 1.14 New best model saved! Score: 1.18
Episode 674	Average Score: 1.18 New best model saved! Score: 1.21
Episode 675	Average Score: 1.21 New best model saved! Score: 1.22
Episode 676	Average Score: 1.22 New best model saved! Score: 1.34
Episode 677	Average Score: 1.34 New best model saved! Score: 1.41
Episode 680	Average Score: 1.37 New best model saved! Score: 1.42
Episode 681	Average Score: 1.42 New best model saved!

[I 2025-03-03 08:29:36,417] Trial 21 finished with value: 1.8739999999999981 and parameters: {'BUFFER_SIZE': 884905, 'BATCH_SIZE': 104, 'GAMMA': 0.8843100213082173, 'TAU': 0.006013990020822354, 'UPDATE_EVERY': 1}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.87cuda
cuda
Checkpoint saved at 2025-03-03 08:29:36
Episode 499	Average Score: -0.51Checkpoint saved at 2025-03-03 08:30:59
Episode 701	Average Score: 0.991 New best model saved! Score: 1.03
Episode 715	Average Score: 1.01 New best model saved! Score: 1.06
Episode 716	Average Score: 1.06 New best model saved! Score: 1.07
Episode 718	Average Score: 1.06 New best model saved! Score: 1.10
Episode 719	Average Score: 1.10 New best model saved! Score: 1.12
Episode 738	Average Score: 1.12 New best model saved! Score: 1.15
Episode 749	Average Score: 1.15 New best model saved! Score: 1.17
Episode 750	Average Score: 1.17 New best model saved! Score: 1.19
Episode 759	Average Score: 1.19 New best model saved! Score: 1.20
Episode 762	Average Score: 1.19 New best model saved! Score: 1.22
Episode 765	Average Score: 1.21 New best model saved! Score: 1.23
Episode 766	Average Score: 1.23 New best model saved! Score: 1.25
Episode 767	Average Score: 1.25 New best model saved! 

[I 2025-03-03 08:32:26,756] Trial 22 finished with value: 1.0749999999999997 and parameters: {'BUFFER_SIZE': 998701, 'BATCH_SIZE': 141, 'GAMMA': 0.9020001052278676, 'TAU': 0.007733140340740405, 'UPDATE_EVERY': 1}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.07cuda
cuda
Checkpoint saved at 2025-03-03 08:32:26
Episode 499	Average Score: -1.48Checkpoint saved at 2025-03-03 08:33:22
Episode 869	Average Score: 0.98 New best model saved! Score: 1.06
Episode 870	Average Score: 1.06 New best model saved! Score: 1.07
Episode 878	Average Score: 0.99 New best model saved! Score: 1.08
Episode 879	Average Score: 1.08 New best model saved! Score: 1.16
Episode 881	Average Score: 1.15 New best model saved! Score: 1.19
Episode 904	Average Score: 1.17 New best model saved! Score: 1.20
Episode 905	Average Score: 1.20 New best model saved! Score: 1.21
Episode 906	Average Score: 1.21 New best model saved! Score: 1.22
Episode 910	Average Score: 1.20 New best model saved! Score: 1.26
Episode 924	Average Score: 1.16 New best model saved! Score: 1.28
Episode 925	Average Score: 1.28 New best model saved! Score: 1.28
Episode 928	Average Score: 1.26 New best model saved! Score: 1.34
Episode 932	Average Score: 1.34 New best model saved! S

[I 2025-03-03 08:34:25,237] Trial 23 finished with value: 1.6849999999999983 and parameters: {'BUFFER_SIZE': 849538, 'BATCH_SIZE': 71, 'GAMMA': 0.846973721650347, 'TAU': 0.006416770590197231, 'UPDATE_EVERY': 2}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.68cuda
cuda
Checkpoint saved at 2025-03-03 08:34:25
Episode 499	Average Score: -0.55Checkpoint saved at 2025-03-03 08:35:44
Episode 654	Average Score: 0.972 New best model saved! Score: 1.03
Episode 655	Average Score: 1.03 New best model saved! Score: 1.03
Episode 683	Average Score: 1.03 New best model saved! Score: 1.05
Episode 686	Average Score: 1.02 New best model saved! Score: 1.05
Episode 694	Average Score: 1.02 New best model saved! Score: 1.06
Episode 695	Average Score: 1.06 New best model saved! Score: 1.09
Episode 696	Average Score: 1.09 New best model saved! Score: 1.12
Episode 702	Average Score: 1.10 New best model saved! Score: 1.12
Episode 703	Average Score: 1.12 New best model saved! Score: 1.16
Episode 707	Average Score: 1.14 New best model saved! Score: 1.20
Episode 709	Average Score: 1.19 New best model saved! Score: 1.23
Episode 710	Average Score: 1.23 New best model saved! Score: 1.27
Episode 712	Average Score: 1.25 New best model saved! 

[I 2025-03-03 08:37:15,660] Trial 24 finished with value: 1.1959999999999997 and parameters: {'BUFFER_SIZE': 922104, 'BATCH_SIZE': 106, 'GAMMA': 0.8767393944585078, 'TAU': 0.004414598901423127, 'UPDATE_EVERY': 1}. Best is trial 12 with value: 2.758999999999996.


Episode 999	Average Score: 1.20cuda
cuda
Checkpoint saved at 2025-03-03 08:37:15
Episode 499	Average Score: -1.85Checkpoint saved at 2025-03-03 08:38:06
Episode 719	Average Score: 0.884 New best model saved! Score: 1.00
Episode 720	Average Score: 1.00 New best model saved! Score: 1.06
Episode 721	Average Score: 1.06 New best model saved! Score: 1.12
Episode 722	Average Score: 1.12 New best model saved! Score: 1.14
Episode 723	Average Score: 1.14 New best model saved! Score: 1.21
Episode 730	Average Score: 1.17 New best model saved! Score: 1.26
Episode 731	Average Score: 1.26 New best model saved! Score: 1.31
Episode 733	Average Score: 1.30 New best model saved! Score: 1.33
Episode 735	Average Score: 1.33 New best model saved! Score: 1.41
Episode 736	Average Score: 1.41 New best model saved! Score: 1.46
Episode 737	Average Score: 1.46 New best model saved! Score: 1.46
Episode 739	Average Score: 1.46 New best model saved! Score: 1.53
Episode 740	Average Score: 1.53 New best model saved! 

[I 2025-03-03 08:39:22,063] Trial 25 finished with value: 4.195999999999997 and parameters: {'BUFFER_SIZE': 657941, 'BATCH_SIZE': 130, 'GAMMA': 0.8930005569554494, 'TAU': 0.005222154546958184, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 4.20cuda
cuda
Checkpoint saved at 2025-03-03 08:39:22
Episode 499	Average Score: -1.99Checkpoint saved at 2025-03-03 08:40:11
Episode 997	Average Score: 0.665

[I 2025-03-03 08:41:05,612] Trial 26 finished with value: 0.6690000000000002 and parameters: {'BUFFER_SIZE': 663230, 'BATCH_SIZE': 129, 'GAMMA': 0.9142689637761866, 'TAU': 0.005222565897589824, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.67cuda
cuda
Checkpoint saved at 2025-03-03 08:41:05
Episode 499	Average Score: -2.15Checkpoint saved at 2025-03-03 08:42:03
Episode 898	Average Score: 0.99 New best model saved! Score: 1.01
Episode 899	Average Score: 1.01 New best model saved! Score: 1.03
Episode 905	Average Score: 1.02 New best model saved! Score: 1.05
Episode 906	Average Score: 1.05 New best model saved! Score: 1.07
Episode 907	Average Score: 1.07 New best model saved! Score: 1.08
Episode 954	Average Score: 1.05 New best model saved! Score: 1.08
Episode 997	Average Score: 0.91

[I 2025-03-03 08:43:03,722] Trial 27 finished with value: 0.9220000000000005 and parameters: {'BUFFER_SIZE': 573039, 'BATCH_SIZE': 152, 'GAMMA': 0.9549345225685404, 'TAU': 0.004013480459266401, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.92cuda
cuda
Checkpoint saved at 2025-03-03 08:43:03
Episode 499	Average Score: -2.00Checkpoint saved at 2025-03-03 08:43:50
Episode 998	Average Score: -0.42

[I 2025-03-03 08:44:43,012] Trial 28 finished with value: -0.4109999999999999 and parameters: {'BUFFER_SIZE': 469673, 'BATCH_SIZE': 84, 'GAMMA': 0.899094023590701, 'TAU': 0.0019460179016515338, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -0.41cuda
cuda
Checkpoint saved at 2025-03-03 08:44:43
Episode 499	Average Score: -1.94Checkpoint saved at 2025-03-03 08:45:22
Episode 998	Average Score: -1.13

[I 2025-03-03 08:46:05,753] Trial 29 finished with value: -1.1409999999999996 and parameters: {'BUFFER_SIZE': 774080, 'BATCH_SIZE': 42, 'GAMMA': 0.9306747956986952, 'TAU': 0.0053240387862797086, 'UPDATE_EVERY': 5}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -1.14cuda
cuda
Checkpoint saved at 2025-03-03 08:46:05
Episode 499	Average Score: -1.85Checkpoint saved at 2025-03-03 08:46:44
Episode 996	Average Score: 0.474

[I 2025-03-03 08:47:26,387] Trial 30 finished with value: 0.5420000000000003 and parameters: {'BUFFER_SIZE': 662607, 'BATCH_SIZE': 57, 'GAMMA': 0.948563188006432, 'TAU': 0.007476160700136136, 'UPDATE_EVERY': 6}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.54cuda
cuda
Checkpoint saved at 2025-03-03 08:47:26
Episode 499	Average Score: -0.61Checkpoint saved at 2025-03-03 08:48:43
Episode 728	Average Score: 0.973 New best model saved! Score: 1.00
Episode 729	Average Score: 1.00 New best model saved! Score: 1.00
Episode 730	Average Score: 1.00 New best model saved! Score: 1.00
Episode 732	Average Score: 0.97 New best model saved! Score: 1.01
Episode 736	Average Score: 1.00 New best model saved! Score: 1.04
Episode 744	Average Score: 1.03 New best model saved! Score: 1.04
Episode 746	Average Score: 1.02 New best model saved! Score: 1.07
Episode 747	Average Score: 1.07 New best model saved! Score: 1.08
Episode 750	Average Score: 1.08 New best model saved! Score: 1.08
Episode 751	Average Score: 1.08 New best model saved! Score: 1.10
Episode 753	Average Score: 1.10 New best model saved! Score: 1.10
Episode 754	Average Score: 1.10 New best model saved! Score: 1.12
Episode 757	Average Score: 1.09 New best model saved! 

[I 2025-03-03 08:50:08,101] Trial 31 finished with value: 1.4010000000000002 and parameters: {'BUFFER_SIZE': 826812, 'BATCH_SIZE': 96, 'GAMMA': 0.8894348665309251, 'TAU': 0.006696152437232448, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.40cuda
cuda
Checkpoint saved at 2025-03-03 08:50:08
Episode 499	Average Score: -1.13Checkpoint saved at 2025-03-03 08:51:06
Episode 696	Average Score: 1.00 New best model saved! Score: 1.00
Episode 697	Average Score: 1.00 New best model saved! Score: 1.04
Episode 699	Average Score: 1.03 New best model saved! Score: 1.04
Episode 704	Average Score: 1.01 New best model saved! Score: 1.04
Episode 705	Average Score: 1.04 New best model saved! Score: 1.05
Episode 737	Average Score: 1.00 New best model saved! Score: 1.10
Episode 743	Average Score: 1.06 New best model saved! Score: 1.10
Episode 749	Average Score: 1.08 New best model saved! Score: 1.11
Episode 750	Average Score: 1.11 New best model saved! Score: 1.12
Episode 753	Average Score: 1.09 New best model saved! Score: 1.13
Episode 930	Average Score: 1.10 New best model saved! Score: 1.13
Episode 931	Average Score: 1.13 New best model saved! Score: 1.14
Episode 932	Average Score: 1.14 New best model saved! S

[I 2025-03-03 08:52:09,470] Trial 32 finished with value: 1.3709999999999996 and parameters: {'BUFFER_SIZE': 934530, 'BATCH_SIZE': 115, 'GAMMA': 0.8735545201867755, 'TAU': 0.006156571948699887, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.37cuda
cuda
Checkpoint saved at 2025-03-03 08:52:09
Episode 499	Average Score: -0.49Checkpoint saved at 2025-03-03 08:53:33
Episode 778	Average Score: 0.95 New best model saved! Score: 1.00
Episode 779	Average Score: 1.00 New best model saved! Score: 1.00
Episode 780	Average Score: 1.00 New best model saved! Score: 1.03
Episode 781	Average Score: 1.03 New best model saved! Score: 1.06
Episode 796	Average Score: 1.04 New best model saved! Score: 1.07
Episode 841	Average Score: 1.05 New best model saved! Score: 1.07
Episode 842	Average Score: 1.07 New best model saved! Score: 1.11
Episode 843	Average Score: 1.11 New best model saved! Score: 1.13
Episode 858	Average Score: 1.11 New best model saved! Score: 1.13
Episode 859	Average Score: 1.13 New best model saved! Score: 1.14
Episode 860	Average Score: 1.14 New best model saved! Score: 1.17
Episode 861	Average Score: 1.17 New best model saved! Score: 1.18
Episode 864	Average Score: 1.17 New best model saved! S

[I 2025-03-03 08:55:02,615] Trial 33 finished with value: 1.3470000000000004 and parameters: {'BUFFER_SIZE': 715574, 'BATCH_SIZE': 127, 'GAMMA': 0.993871646497503, 'TAU': 0.005467281076302771, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.35cuda
cuda
Checkpoint saved at 2025-03-03 08:55:02
Episode 499	Average Score: -0.53Checkpoint saved at 2025-03-03 08:56:00
Episode 998	Average Score: 0.470

[I 2025-03-03 08:57:00,530] Trial 34 finished with value: 0.4780000000000007 and parameters: {'BUFFER_SIZE': 951512, 'BATCH_SIZE': 135, 'GAMMA': 0.974637776379298, 'TAU': 0.007047488763435079, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.48cuda
cuda
Checkpoint saved at 2025-03-03 08:57:00
Episode 499	Average Score: -2.71Checkpoint saved at 2025-03-03 08:57:43
Episode 996	Average Score: -1.50

[I 2025-03-03 08:58:30,201] Trial 35 finished with value: -1.4399999999999997 and parameters: {'BUFFER_SIZE': 874352, 'BATCH_SIZE': 69, 'GAMMA': 0.8413577063879639, 'TAU': 0.008229198252077253, 'UPDATE_EVERY': 4}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -1.44cuda
cuda
Checkpoint saved at 2025-03-03 08:58:30
Episode 499	Average Score: -2.20Checkpoint saved at 2025-03-03 08:59:19
Episode 998	Average Score: 0.816

[I 2025-03-03 09:00:14,900] Trial 36 finished with value: 0.7629999999999996 and parameters: {'BUFFER_SIZE': 551138, 'BATCH_SIZE': 113, 'GAMMA': 0.85717510279192, 'TAU': 0.005105998548114702, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.76cuda
cuda
Checkpoint saved at 2025-03-03 09:00:14
Episode 499	Average Score: -0.86Checkpoint saved at 2025-03-03 09:01:31
Episode 996	Average Score: 0.99 New best model saved! Score: 1.02
Episode 997	Average Score: 1.02

[I 2025-03-03 09:02:53,732] Trial 37 finished with value: 1.019000000000001 and parameters: {'BUFFER_SIZE': 620855, 'BATCH_SIZE': 91, 'GAMMA': 0.888497691912013, 'TAU': 0.003658618264378406, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


 New best model saved! Score: 1.04
Episode 999	Average Score: 1.02cuda
cuda
Checkpoint saved at 2025-03-03 09:02:53
Episode 499	Average Score: -1.69Checkpoint saved at 2025-03-03 09:03:32
Episode 998	Average Score: -0.16

[I 2025-03-03 09:04:14,185] Trial 38 finished with value: -0.15299999999999947 and parameters: {'BUFFER_SIZE': 794114, 'BATCH_SIZE': 156, 'GAMMA': 0.9115241202720007, 'TAU': 0.004053472771785959, 'UPDATE_EVERY': 7}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -0.15cuda
cuda
Checkpoint saved at 2025-03-03 09:04:14
Episode 499	Average Score: -2.51Checkpoint saved at 2025-03-03 09:05:07
Episode 912	Average Score: 0.962 New best model saved! Score: 1.08
Episode 928	Average Score: 1.07 New best model saved! Score: 1.11
Episode 952	Average Score: 1.04 New best model saved! Score: 1.16
Episode 953	Average Score: 1.16 New best model saved! Score: 1.20
Episode 955	Average Score: 1.20 New best model saved! Score: 1.23
Episode 959	Average Score: 1.21 New best model saved! Score: 1.24
Episode 965	Average Score: 1.21 New best model saved! Score: 1.25
Episode 966	Average Score: 1.25 New best model saved! Score: 1.31
Episode 967	Average Score: 1.31 New best model saved! Score: 1.34
Episode 968	Average Score: 1.34 New best model saved! Score: 1.38
Episode 969	Average Score: 1.38 New best model saved! Score: 1.40
Episode 970	Average Score: 1.40 New best model saved! Score: 1.43
Episode 971	Average Score: 1.43 New best model saved!

[I 2025-03-03 09:06:07,991] Trial 39 finished with value: 1.495999999999999 and parameters: {'BUFFER_SIZE': 332517, 'BATCH_SIZE': 80, 'GAMMA': 0.8679857539666785, 'TAU': 0.007997309745118019, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.50cuda
cuda
Checkpoint saved at 2025-03-03 09:06:08
Episode 499	Average Score: -1.23Checkpoint saved at 2025-03-03 09:06:45
Episode 997	Average Score: -0.67

[I 2025-03-03 09:07:33,762] Trial 40 finished with value: -0.5280000000000014 and parameters: {'BUFFER_SIZE': 951634, 'BATCH_SIZE': 33, 'GAMMA': 0.8355432186855247, 'TAU': 0.006536075977165619, 'UPDATE_EVERY': 10}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -0.53cuda
cuda
Checkpoint saved at 2025-03-03 09:07:33
Episode 499	Average Score: -1.80Checkpoint saved at 2025-03-03 09:08:28
Episode 881	Average Score: 0.97 New best model saved! Score: 1.03
Episode 882	Average Score: 1.03 New best model saved! Score: 1.14
Episode 883	Average Score: 1.14 New best model saved! Score: 1.15
Episode 884	Average Score: 1.15 New best model saved! Score: 1.16
Episode 885	Average Score: 1.16 New best model saved! Score: 1.17
Episode 886	Average Score: 1.17 New best model saved! Score: 1.18
Episode 888	Average Score: 1.18 New best model saved! Score: 1.19
Episode 890	Average Score: 1.19 New best model saved! Score: 1.20
Episode 891	Average Score: 1.20 New best model saved! Score: 1.20
Episode 892	Average Score: 1.20 New best model saved! Score: 1.22
Episode 893	Average Score: 1.22 New best model saved! Score: 1.23
Episode 894	Average Score: 1.23 New best model saved! Score: 1.23
Episode 898	Average Score: 1.22 New best model saved! 

[I 2025-03-03 09:09:28,005] Trial 41 finished with value: 0.7849999999999997 and parameters: {'BUFFER_SIZE': 848574, 'BATCH_SIZE': 57, 'GAMMA': 0.8470051652475054, 'TAU': 0.006282149926334927, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.78cuda
cuda
Checkpoint saved at 2025-03-03 09:09:28
Episode 499	Average Score: -1.76Checkpoint saved at 2025-03-03 09:10:49
Episode 902	Average Score: 0.961 New best model saved! Score: 1.00
Episode 921	Average Score: 0.95 New best model saved! Score: 1.00
Episode 922	Average Score: 1.00 New best model saved! Score: 1.04
Episode 923	Average Score: 1.04 New best model saved! Score: 1.04
Episode 924	Average Score: 1.04 New best model saved! Score: 1.05
Episode 925	Average Score: 1.05 New best model saved! Score: 1.09
Episode 935	Average Score: 1.07 New best model saved! Score: 1.18
Episode 936	Average Score: 1.18 New best model saved! Score: 1.20
Episode 948	Average Score: 1.17 New best model saved! Score: 1.21
Episode 949	Average Score: 1.21 New best model saved! Score: 1.24
Episode 950	Average Score: 1.24 New best model saved! Score: 1.24
Episode 954	Average Score: 1.22 New best model saved! Score: 1.25
Episode 972	Average Score: 1.20 New best model saved! 

[I 2025-03-03 09:12:25,471] Trial 42 finished with value: 1.2899999999999998 and parameters: {'BUFFER_SIZE': 746416, 'BATCH_SIZE': 105, 'GAMMA': 0.8095179934504467, 'TAU': 0.007156993480326625, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 998	Average Score: 1.23 New best model saved! Score: 1.29
Episode 999	Average Score: 1.29cuda
cuda
Checkpoint saved at 2025-03-03 09:12:25
Episode 499	Average Score: -1.63Checkpoint saved at 2025-03-03 09:13:20
Episode 953	Average Score: 0.961 New best model saved! Score: 1.01
Episode 954	Average Score: 1.01 New best model saved! Score: 1.02
Episode 957	Average Score: 1.02 New best model saved! Score: 1.03
Episode 958	Average Score: 1.03 New best model saved! Score: 1.05
Episode 959	Average Score: 1.05 New best model saved! Score: 1.07
Episode 961	Average Score: 1.07 New best model saved! Score: 1.08
Episode 962	Average Score: 1.08 New best model saved! Score: 1.13
Episode 968	Average Score: 1.12 New best model saved! Score: 1.15
Episode 973	Average Score: 1.14 New best model saved! Score: 1.15
Episode 977	Average Score: 1.11 New best model saved! Score: 1.16
Episode 997	Average Score: 1.04

[I 2025-03-03 09:14:18,257] Trial 43 finished with value: 1.0450000000000002 and parameters: {'BUFFER_SIZE': 822243, 'BATCH_SIZE': 70, 'GAMMA': 0.8963632809135514, 'TAU': 0.005785290178001481, 'UPDATE_EVERY': 2}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.05cuda
cuda
Checkpoint saved at 2025-03-03 09:14:18
Episode 499	Average Score: -0.28Checkpoint saved at 2025-03-03 09:15:36
Episode 697	Average Score: 0.99 New best model saved! Score: 1.01
Episode 698	Average Score: 1.01 New best model saved! Score: 1.03
Episode 699	Average Score: 1.03 New best model saved! Score: 1.03
Episode 700	Average Score: 1.03 New best model saved! Score: 1.09
Episode 701	Average Score: 1.09 New best model saved! Score: 1.12
Episode 714	Average Score: 1.10 New best model saved! Score: 1.14
Episode 715	Average Score: 1.14 New best model saved! Score: 1.14
Episode 716	Average Score: 1.14 New best model saved! Score: 1.18
Episode 717	Average Score: 1.18 New best model saved! Score: 1.20
Episode 956	Average Score: 1.20 New best model saved! Score: 1.21
Episode 960	Average Score: 1.19 New best model saved! Score: 1.23
Episode 961	Average Score: 1.23 New best model saved! Score: 1.25
Episode 962	Average Score: 1.25 New best model saved! S

[I 2025-03-03 09:17:05,459] Trial 44 finished with value: 1.6959999999999982 and parameters: {'BUFFER_SIZE': 861969, 'BATCH_SIZE': 78, 'GAMMA': 0.8501473367075887, 'TAU': 0.006296363429786361, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.70cuda
cuda
Checkpoint saved at 2025-03-03 09:17:05
Episode 499	Average Score: 0.032Checkpoint saved at 2025-03-03 09:18:28
Episode 565	Average Score: 0.99 New best model saved! Score: 1.03
Episode 566	Average Score: 1.03 New best model saved! Score: 1.06
Episode 567	Average Score: 1.06 New best model saved! Score: 1.09
Episode 568	Average Score: 1.09 New best model saved! Score: 1.10
Episode 569	Average Score: 1.10 New best model saved! Score: 1.12
Episode 570	Average Score: 1.12 New best model saved! Score: 1.16
Episode 571	Average Score: 1.16 New best model saved! Score: 1.21
Episode 572	Average Score: 1.21 New best model saved! Score: 1.25
Episode 573	Average Score: 1.25 New best model saved! Score: 1.28
Episode 575	Average Score: 1.28 New best model saved! Score: 1.29
Episode 577	Average Score: 1.29 New best model saved! Score: 1.33
Episode 578	Average Score: 1.33 New best model saved! Score: 1.37
Episode 624	Average Score: 1.36 New best model saved! S

[I 2025-03-03 09:20:06,819] Trial 45 finished with value: 2.7019999999999964 and parameters: {'BUFFER_SIZE': 881899, 'BATCH_SIZE': 113, 'GAMMA': 0.8819270729451355, 'TAU': 0.004937381764965804, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


 New best model saved! Score: 2.70
Episode 998	Average Score: 2.70 New best model saved! Score: 2.70
Episode 999	Average Score: 2.70cuda
cuda
Checkpoint saved at 2025-03-03 09:20:06
Episode 499	Average Score: -1.63Checkpoint saved at 2025-03-03 09:20:54
Episode 898	Average Score: 0.931 New best model saved! Score: 1.01
Episode 899	Average Score: 1.01 New best model saved! Score: 1.02
Episode 902	Average Score: 1.00 New best model saved! Score: 1.03
Episode 905	Average Score: 1.01 New best model saved! Score: 1.04
Episode 917	Average Score: 1.01 New best model saved! Score: 1.06
Episode 918	Average Score: 1.06 New best model saved! Score: 1.09
Episode 919	Average Score: 1.09 New best model saved! Score: 1.13
Episode 921	Average Score: 1.08 New best model saved! Score: 1.17
Episode 922	Average Score: 1.17 New best model saved! Score: 1.19
Episode 923	Average Score: 1.19 New best model saved! Score: 1.21
Episode 925	Average Score: 1.21 New best model saved! Score: 1.23
Episode 930	Average

[I 2025-03-03 09:21:48,442] Trial 46 finished with value: 0.8600000000000001 and parameters: {'BUFFER_SIZE': 968590, 'BATCH_SIZE': 113, 'GAMMA': 0.8835325839347465, 'TAU': 0.0029767573537967397, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


cuda
cuda
Checkpoint saved at 2025-03-03 09:21:48
Episode 499	Average Score: -0.13Checkpoint saved at 2025-03-03 09:23:11
Episode 878	Average Score: 1.000 New best model saved! Score: 1.03
Episode 998	Average Score: 0.85

[I 2025-03-03 09:25:00,312] Trial 47 finished with value: 0.8480000000000001 and parameters: {'BUFFER_SIZE': 907433, 'BATCH_SIZE': 133, 'GAMMA': 0.9194795951845274, 'TAU': 0.0025203819182827896, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.85cuda
cuda
Checkpoint saved at 2025-03-03 09:25:00
Episode 499	Average Score: -1.95Checkpoint saved at 2025-03-03 09:25:46
Episode 996	Average Score: 0.554

[I 2025-03-03 09:26:37,408] Trial 48 finished with value: 0.5460000000000003 and parameters: {'BUFFER_SIZE': 35225, 'BATCH_SIZE': 172, 'GAMMA': 0.9032566256113828, 'TAU': 0.004897698479655626, 'UPDATE_EVERY': 4}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.55cuda
cuda
Checkpoint saved at 2025-03-03 09:26:37
Episode 499	Average Score: -0.49Checkpoint saved at 2025-03-03 09:28:01
Episode 763	Average Score: 0.962 New best model saved! Score: 1.01
Episode 764	Average Score: 1.01 New best model saved! Score: 1.02
Episode 765	Average Score: 1.02 New best model saved! Score: 1.07
Episode 766	Average Score: 1.07 New best model saved! Score: 1.11
Episode 769	Average Score: 1.10 New best model saved! Score: 1.12
Episode 773	Average Score: 1.09 New best model saved! Score: 1.13
Episode 775	Average Score: 1.11 New best model saved! Score: 1.14
Episode 776	Average Score: 1.14 New best model saved! Score: 1.16
Episode 777	Average Score: 1.16 New best model saved! Score: 1.20
Episode 778	Average Score: 1.20 New best model saved! Score: 1.20
Episode 781	Average Score: 1.19 New best model saved! Score: 1.22
Episode 784	Average Score: 1.22 New best model saved! Score: 1.24
Episode 786	Average Score: 1.23 New best model saved! 

[I 2025-03-03 09:29:44,766] Trial 49 finished with value: 1.6749999999999998 and parameters: {'BUFFER_SIZE': 390002, 'BATCH_SIZE': 148, 'GAMMA': 0.8700275528757849, 'TAU': 0.005462529831074198, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.67cuda
cuda
Checkpoint saved at 2025-03-03 09:29:44
Episode 499	Average Score: -2.32Checkpoint saved at 2025-03-03 09:30:33
Episode 998	Average Score: 0.752

[I 2025-03-03 09:31:40,425] Trial 50 finished with value: 0.7639999999999967 and parameters: {'BUFFER_SIZE': 153781, 'BATCH_SIZE': 94, 'GAMMA': 0.8634279415591586, 'TAU': 0.0008216809160210481, 'UPDATE_EVERY': 3}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 0.76cuda
cuda
Checkpoint saved at 2025-03-03 09:31:40
Episode 499	Average Score: 0.030Checkpoint saved at 2025-03-03 09:32:58
Episode 615	Average Score: 0.971 New best model saved! Score: 1.04
Episode 616	Average Score: 1.04 New best model saved! Score: 1.05
Episode 682	Average Score: 1.01 New best model saved! Score: 1.07
Episode 683	Average Score: 1.07 New best model saved! Score: 1.11
Episode 685	Average Score: 1.09 New best model saved! Score: 1.12
Episode 686	Average Score: 1.12 New best model saved! Score: 1.13
Episode 689	Average Score: 1.12 New best model saved! Score: 1.14
Episode 691	Average Score: 1.13 New best model saved! Score: 1.15
Episode 692	Average Score: 1.15 New best model saved! Score: 1.16
Episode 705	Average Score: 1.12 New best model saved! Score: 1.16
Episode 708	Average Score: 1.14 New best model saved! Score: 1.16
Episode 709	Average Score: 1.16 New best model saved! Score: 1.22
Episode 712	Average Score: 1.22 New best model saved! 

[I 2025-03-03 09:34:30,791] Trial 51 finished with value: 1.3339999999999994 and parameters: {'BUFFER_SIZE': 881405, 'BATCH_SIZE': 107, 'GAMMA': 0.8833118405980956, 'TAU': 0.004311894177259603, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.33cuda
cuda
Checkpoint saved at 2025-03-03 09:34:30
Episode 499	Average Score: 0.121Checkpoint saved at 2025-03-03 09:35:48
Episode 622	Average Score: 0.99 New best model saved! Score: 1.02
Episode 623	Average Score: 1.02 New best model saved! Score: 1.08
Episode 624	Average Score: 1.08 New best model saved! Score: 1.12
Episode 628	Average Score: 1.10 New best model saved! Score: 1.14
Episode 630	Average Score: 1.14 New best model saved! Score: 1.17
Episode 631	Average Score: 1.17 New best model saved! Score: 1.18
Episode 634	Average Score: 1.16 New best model saved! Score: 1.19
Episode 635	Average Score: 1.19 New best model saved! Score: 1.24
Episode 636	Average Score: 1.24 New best model saved! Score: 1.28
Episode 637	Average Score: 1.28 New best model saved! Score: 1.33
Episode 638	Average Score: 1.33 New best model saved! Score: 1.34
Episode 639	Average Score: 1.34 New best model saved! Score: 1.44
Episode 641	Average Score: 1.41 New best model saved! S

[I 2025-03-03 09:37:22,209] Trial 52 finished with value: 1.8859999999999986 and parameters: {'BUFFER_SIZE': 714485, 'BATCH_SIZE': 80, 'GAMMA': 0.8532747865428364, 'TAU': 0.006779485309738886, 'UPDATE_EVERY': 1}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: 1.89cuda
cuda
Checkpoint saved at 2025-03-03 09:37:22
Episode 499	Average Score: -1.68Checkpoint saved at 2025-03-03 09:38:01
Episode 997	Average Score: -0.21

[I 2025-03-03 09:38:43,695] Trial 53 finished with value: -0.15199999999999958 and parameters: {'BUFFER_SIZE': 617930, 'BATCH_SIZE': 123, 'GAMMA': 0.8744144681269809, 'TAU': 0.0068514415077495245, 'UPDATE_EVERY': 9}. Best is trial 25 with value: 4.195999999999997.


Episode 999	Average Score: -0.15cuda
cuda
Checkpoint saved at 2025-03-03 09:38:43
Episode 499	Average Score: -0.20Checkpoint saved at 2025-03-03 09:39:40
Episode 686	Average Score: 1.002 New best model saved! Score: 1.05
Episode 687	Average Score: 1.05 New best model saved! Score: 1.09
Episode 688	Average Score: 1.09 New best model saved! Score: 1.12
Episode 692	Average Score: 1.11 New best model saved! Score: 1.16
Episode 693	Average Score: 1.16 New best model saved! Score: 1.26
Episode 694	Average Score: 1.26 New best model saved! Score: 1.29
Episode 695	Average Score: 1.29 New best model saved! Score: 1.40
Episode 696	Average Score: 1.40 New best model saved! Score: 1.45
Episode 700	Average Score: 1.44 New best model saved! Score: 1.50
Episode 702	Average Score: 1.48 New best model saved! Score: 1.50
Episode 704	Average Score: 1.50 New best model saved! Score: 1.52
Episode 705	Average Score: 1.52 New best model saved! Score: 1.54
Episode 706	Average Score: 1.54 New best model saved!

[I 2025-03-03 09:41:00,804] Trial 54 finished with value: 4.479999999999996 and parameters: {'BUFFER_SIZE': 697736, 'BATCH_SIZE': 96, 'GAMMA': 0.8594071260082985, 'TAU': 0.005650809854995951, 'UPDATE_EVERY': 2}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: 4.48cuda
cuda
Checkpoint saved at 2025-03-03 09:41:00
Episode 499	Average Score: -1.42Checkpoint saved at 2025-03-03 09:41:59
Episode 883	Average Score: 0.933 New best model saved! Score: 1.04
Episode 884	Average Score: 1.04 New best model saved! Score: 1.05
Episode 891	Average Score: 1.05 New best model saved! Score: 1.14
Episode 911	Average Score: 1.14 New best model saved! Score: 1.19
Episode 912	Average Score: 1.19 New best model saved! Score: 1.19
Episode 913	Average Score: 1.19 New best model saved! Score: 1.20
Episode 916	Average Score: 1.19 New best model saved! Score: 1.22
Episode 945	Average Score: 1.20 New best model saved! Score: 1.23
Episode 946	Average Score: 1.23 New best model saved! Score: 1.24
Episode 947	Average Score: 1.24 New best model saved! Score: 1.26
Episode 961	Average Score: 1.21 New best model saved! Score: 1.28
Episode 962	Average Score: 1.28 New best model saved! Score: 1.38
Episode 997	Average Score: 1.10

[I 2025-03-03 09:42:57,024] Trial 55 finished with value: 1.2160000000000002 and parameters: {'BUFFER_SIZE': 716348, 'BATCH_SIZE': 117, 'GAMMA': 0.8338718247262619, 'TAU': 0.004803227100802942, 'UPDATE_EVERY': 2}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: 1.22cuda
cuda
Checkpoint saved at 2025-03-03 09:42:57
Episode 499	Average Score: -1.95Checkpoint saved at 2025-03-03 09:43:50
Episode 997	Average Score: 0.341

[I 2025-03-03 09:44:48,887] Trial 56 finished with value: 0.3940000000000006 and parameters: {'BUFFER_SIZE': 679792, 'BATCH_SIZE': 89, 'GAMMA': 0.855653471844737, 'TAU': 0.0057462400278435, 'UPDATE_EVERY': 2}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: 0.39cuda
cuda
Checkpoint saved at 2025-03-03 09:44:48
Episode 499	Average Score: -1.59Checkpoint saved at 2025-03-03 09:45:42
Episode 934	Average Score: 1.00 New best model saved! Score: 1.04
Episode 936	Average Score: 1.04 New best model saved! Score: 1.06
Episode 937	Average Score: 1.06 New best model saved! Score: 1.07
Episode 941	Average Score: 1.04 New best model saved! Score: 1.12
Episode 942	Average Score: 1.12 New best model saved! Score: 1.13
Episode 948	Average Score: 1.12 New best model saved! Score: 1.14
Episode 954	Average Score: 1.11 New best model saved! Score: 1.15
Episode 955	Average Score: 1.15 New best model saved! Score: 1.16
Episode 957	Average Score: 1.13 New best model saved! Score: 1.19
Episode 959	Average Score: 1.16 New best model saved! Score: 1.19
Episode 960	Average Score: 1.19 New best model saved! Score: 1.22
Episode 961	Average Score: 1.22 New best model saved! Score: 1.24
Episode 977	Average Score: 1.23 New best model saved! S

[I 2025-03-03 09:46:53,540] Trial 57 finished with value: 1.4719999999999984 and parameters: {'BUFFER_SIZE': 589609, 'BATCH_SIZE': 100, 'GAMMA': 0.8665501283941811, 'TAU': 0.00849323645517278, 'UPDATE_EVERY': 2}. Best is trial 54 with value: 4.479999999999996.


Episode 998	Average Score: 1.46 New best model saved! Score: 1.47
Episode 999	Average Score: 1.47cuda
cuda
Checkpoint saved at 2025-03-03 09:46:53
Episode 499	Average Score: -0.70Checkpoint saved at 2025-03-03 09:48:05
Episode 776	Average Score: 0.97 New best model saved! Score: 1.04
Episode 778	Average Score: 1.03 New best model saved! Score: 1.08
Episode 781	Average Score: 1.08 New best model saved! Score: 1.09
Episode 784	Average Score: 1.07 New best model saved! Score: 1.15
Episode 785	Average Score: 1.15 New best model saved! Score: 1.19
Episode 786	Average Score: 1.19 New best model saved! Score: 1.21
Episode 791	Average Score: 1.19 New best model saved! Score: 1.22
Episode 792	Average Score: 1.22 New best model saved! Score: 1.25
Episode 794	Average Score: 1.25 New best model saved! Score: 1.26
Episode 797	Average Score: 1.26 New best model saved! Score: 1.28
Episode 798	Average Score: 1.28 New best model saved! Score: 1.31
Episode 802	Average Score: 1.29 New best model saved! S

[I 2025-03-03 09:49:28,120] Trial 58 finished with value: 0.38300000000000034 and parameters: {'BUFFER_SIZE': 533944, 'BATCH_SIZE': 64, 'GAMMA': 0.8422299034538261, 'TAU': 0.007226926878083599, 'UPDATE_EVERY': 1}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: 0.38cuda
cuda
Checkpoint saved at 2025-03-03 09:49:28
Episode 499	Average Score: -1.05Checkpoint saved at 2025-03-03 09:50:22
Episode 757	Average Score: 0.961 New best model saved! Score: 1.00
Episode 761	Average Score: 1.00 New best model saved! Score: 1.03
Episode 762	Average Score: 1.03 New best model saved! Score: 1.11
Episode 772	Average Score: 1.09 New best model saved! Score: 1.14
Episode 773	Average Score: 1.14 New best model saved! Score: 1.19
Episode 774	Average Score: 1.19 New best model saved! Score: 1.23
Episode 775	Average Score: 1.23 New best model saved! Score: 1.25
Episode 777	Average Score: 1.23 New best model saved! Score: 1.26
Episode 778	Average Score: 1.26 New best model saved! Score: 1.29
Episode 779	Average Score: 1.29 New best model saved! Score: 1.32
Episode 780	Average Score: 1.32 New best model saved! Score: 1.36
Episode 787	Average Score: 1.29 New best model saved! Score: 1.36
Episode 788	Average Score: 1.36 New best model saved! 

[I 2025-03-03 09:51:43,798] Trial 59 finished with value: 3.5969999999999964 and parameters: {'BUFFER_SIZE': 794520, 'BATCH_SIZE': 249, 'GAMMA': 0.8762477873477194, 'TAU': 0.005489426832999607, 'UPDATE_EVERY': 3}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: 3.60cuda
cuda
Checkpoint saved at 2025-03-03 09:51:43
Episode 499	Average Score: -1.91Checkpoint saved at 2025-03-03 09:52:33
Episode 997	Average Score: -0.11

[I 2025-03-03 09:53:24,542] Trial 60 finished with value: -0.13099999999999914 and parameters: {'BUFFER_SIZE': 805838, 'BATCH_SIZE': 254, 'GAMMA': 0.8938957977489891, 'TAU': 0.004489694820735728, 'UPDATE_EVERY': 4}. Best is trial 54 with value: 4.479999999999996.


Episode 999	Average Score: -0.13cuda
cuda
Checkpoint saved at 2025-03-03 09:53:24
Episode 499	Average Score: -1.35Checkpoint saved at 2025-03-03 09:54:18
Episode 701	Average Score: 0.97 New best model saved! Score: 1.01
Episode 702	Average Score: 1.01 New best model saved! Score: 1.02
Episode 703	Average Score: 1.02 New best model saved! Score: 1.03
Episode 704	Average Score: 1.03 New best model saved! Score: 1.03
Episode 705	Average Score: 1.03 New best model saved! Score: 1.05
Episode 734	Average Score: 1.05 New best model saved! Score: 1.10
Episode 735	Average Score: 1.10 New best model saved! Score: 1.12
Episode 736	Average Score: 1.12 New best model saved! Score: 1.12
Episode 762	Average Score: 1.11 New best model saved! Score: 1.14
Episode 764	Average Score: 1.14 New best model saved! Score: 1.16
Episode 765	Average Score: 1.16 New best model saved! Score: 1.18
Episode 766	Average Score: 1.18 New best model saved! Score: 1.24
Episode 767	Average Score: 1.24 New best model saved! 

[I 2025-03-03 09:55:48,095] Trial 61 finished with value: 5.441999999999986 and parameters: {'BUFFER_SIZE': 757624, 'BATCH_SIZE': 228, 'GAMMA': 0.8777525034865641, 'TAU': 0.005566248249524419, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 5.44cuda
cuda
Checkpoint saved at 2025-03-03 09:55:48
Episode 499	Average Score: -2.35Checkpoint saved at 2025-03-03 09:56:40
Episode 833	Average Score: 0.974 New best model saved! Score: 1.01
Episode 834	Average Score: 1.01 New best model saved! Score: 1.01
Episode 835	Average Score: 1.01 New best model saved! Score: 1.03
Episode 836	Average Score: 1.03 New best model saved! Score: 1.03
Episode 837	Average Score: 1.03 New best model saved! Score: 1.05
Episode 839	Average Score: 1.04 New best model saved! Score: 1.07
Episode 840	Average Score: 1.07 New best model saved! Score: 1.09
Episode 846	Average Score: 1.08 New best model saved! Score: 1.19
Episode 859	Average Score: 1.16 New best model saved! Score: 1.20
Episode 860	Average Score: 1.20 New best model saved! Score: 1.22
Episode 874	Average Score: 1.22 New best model saved! Score: 1.24
Episode 878	Average Score: 1.24 New best model saved! Score: 1.26
Episode 879	Average Score: 1.26 New best model saved! 

[I 2025-03-03 09:57:37,991] Trial 62 finished with value: 1.103 and parameters: {'BUFFER_SIZE': 752109, 'BATCH_SIZE': 226, 'GAMMA': 0.8751766299665077, 'TAU': 0.005574771426645019, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.10cuda
cuda
Checkpoint saved at 2025-03-03 09:57:38
Episode 499	Average Score: -1.49Checkpoint saved at 2025-03-03 09:58:31
Episode 744	Average Score: 0.945 New best model saved! Score: 1.08
Episode 745	Average Score: 1.08 New best model saved! Score: 1.26
Episode 746	Average Score: 1.26 New best model saved! Score: 1.29
Episode 747	Average Score: 1.29 New best model saved! Score: 1.32
Episode 748	Average Score: 1.32 New best model saved! Score: 1.38
Episode 750	Average Score: 1.36 New best model saved! Score: 1.39
Episode 753	Average Score: 1.34 New best model saved! Score: 1.41
Episode 754	Average Score: 1.41 New best model saved! Score: 1.53
Episode 755	Average Score: 1.53 New best model saved! Score: 1.58
Episode 757	Average Score: 1.49 New best model saved! Score: 1.69
Episode 759	Average Score: 1.66 New best model saved! Score: 1.78
Episode 761	Average Score: 1.76 New best model saved! Score: 1.79
Episode 762	Average Score: 1.79 New best model saved! 

[I 2025-03-03 10:00:04,245] Trial 63 finished with value: 3.54499999999999 and parameters: {'BUFFER_SIZE': 775441, 'BATCH_SIZE': 194, 'GAMMA': 0.8787888227084641, 'TAU': 0.005093029288545303, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 3.54cuda
cuda
Checkpoint saved at 2025-03-03 10:00:04
Episode 499	Average Score: -1.36Checkpoint saved at 2025-03-03 10:00:50
Episode 998	Average Score: 0.763

[I 2025-03-03 10:01:39,308] Trial 64 finished with value: 0.7749999999999999 and parameters: {'BUFFER_SIZE': 783237, 'BATCH_SIZE': 230, 'GAMMA': 0.9060866506642354, 'TAU': 0.005120716145510213, 'UPDATE_EVERY': 5}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.77cuda
cuda
Checkpoint saved at 2025-03-03 10:01:39
Episode 499	Average Score: -1.69Checkpoint saved at 2025-03-03 10:02:28
Episode 934	Average Score: 0.982 New best model saved! Score: 1.02
Episode 935	Average Score: 1.02 New best model saved! Score: 1.05
Episode 938	Average Score: 1.04 New best model saved! Score: 1.07
Episode 942	Average Score: 1.05 New best model saved! Score: 1.08
Episode 998	Average Score: 0.47

[I 2025-03-03 10:03:18,295] Trial 65 finished with value: 0.4550000000000008 and parameters: {'BUFFER_SIZE': 675633, 'BATCH_SIZE': 232, 'GAMMA': 0.8885858570306441, 'TAU': 0.0041266322243693745, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.46cuda
cuda
Checkpoint saved at 2025-03-03 10:03:18
Episode 499	Average Score: -2.37Checkpoint saved at 2025-03-03 10:04:11
Episode 865	Average Score: 0.97 New best model saved! Score: 1.00
Episode 866	Average Score: 1.00 New best model saved! Score: 1.02
Episode 867	Average Score: 1.02 New best model saved! Score: 1.03
Episode 871	Average Score: 1.01 New best model saved! Score: 1.06
Episode 958	Average Score: 1.04 New best model saved! Score: 1.09
Episode 962	Average Score: 1.05 New best model saved! Score: 1.10
Episode 964	Average Score: 1.10 New best model saved! Score: 1.11
Episode 965	Average Score: 1.11 New best model saved! Score: 1.11
Episode 966	Average Score: 1.11 New best model saved! Score: 1.13
Episode 968	Average Score: 1.13 New best model saved! Score: 1.14
Episode 975	Average Score: 1.11 New best model saved! Score: 1.15
Episode 977	Average Score: 1.12 New best model saved! Score: 1.18
Episode 978	Average Score: 1.18 New best model saved! S

[I 2025-03-03 10:05:12,852] Trial 66 finished with value: 1.3289999999999988 and parameters: {'BUFFER_SIZE': 752135, 'BATCH_SIZE': 210, 'GAMMA': 0.8793694778873652, 'TAU': 0.0036869615090673086, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.33cuda
cuda
Checkpoint saved at 2025-03-03 10:05:12
Episode 499	Average Score: -1.98Checkpoint saved at 2025-03-03 10:06:07
Episode 914	Average Score: 0.991 New best model saved! Score: 1.00
Episode 915	Average Score: 1.00 New best model saved! Score: 1.04
Episode 916	Average Score: 1.04 New best model saved! Score: 1.07
Episode 918	Average Score: 1.07 New best model saved! Score: 1.10
Episode 919	Average Score: 1.10 New best model saved! Score: 1.21
Episode 923	Average Score: 1.18 New best model saved! Score: 1.22
Episode 924	Average Score: 1.22 New best model saved! Score: 1.22
Episode 925	Average Score: 1.22 New best model saved! Score: 1.24
Episode 927	Average Score: 1.23 New best model saved! Score: 1.27
Episode 928	Average Score: 1.27 New best model saved! Score: 1.27
Episode 929	Average Score: 1.27 New best model saved! Score: 1.31
Episode 936	Average Score: 1.28 New best model saved! Score: 1.38
Episode 940	Average Score: 1.33 New best model saved! 

[I 2025-03-03 10:07:11,807] Trial 67 finished with value: 1.2609999999999995 and parameters: {'BUFFER_SIZE': 689777, 'BATCH_SIZE': 239, 'GAMMA': 0.8587921667505485, 'TAU': 0.004793859091981989, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.26cuda
cuda
Checkpoint saved at 2025-03-03 10:07:11
Episode 499	Average Score: -1.71Checkpoint saved at 2025-03-03 10:07:56
Episode 997	Average Score: -0.32

[I 2025-03-03 10:08:43,234] Trial 68 finished with value: -0.3189999999999994 and parameters: {'BUFFER_SIZE': 643729, 'BATCH_SIZE': 192, 'GAMMA': 0.8975416034440314, 'TAU': 0.006015578734730285, 'UPDATE_EVERY': 5}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: -0.32cuda
cuda
Checkpoint saved at 2025-03-03 10:08:43
Episode 499	Average Score: -1.57Checkpoint saved at 2025-03-03 10:09:38
Episode 916	Average Score: 0.973 New best model saved! Score: 1.00
Episode 917	Average Score: 1.00 New best model saved! Score: 1.04
Episode 921	Average Score: 1.04 New best model saved! Score: 1.06
Episode 931	Average Score: 1.06 New best model saved! Score: 1.09
Episode 936	Average Score: 1.07 New best model saved! Score: 1.09
Episode 937	Average Score: 1.09 New best model saved! Score: 1.16
Episode 938	Average Score: 1.16 New best model saved! Score: 1.18
Episode 940	Average Score: 1.18 New best model saved! Score: 1.21
Episode 941	Average Score: 1.21 New best model saved! Score: 1.24
Episode 942	Average Score: 1.24 New best model saved! Score: 1.27
Episode 943	Average Score: 1.27 New best model saved! Score: 1.28
Episode 946	Average Score: 1.28 New best model saved! Score: 1.29
Episode 947	Average Score: 1.29 New best model saved!

[I 2025-03-03 10:10:42,310] Trial 69 finished with value: 2.3419999999999974 and parameters: {'BUFFER_SIZE': 823697, 'BATCH_SIZE': 245, 'GAMMA': 0.8641107125080826, 'TAU': 0.0052751927012502425, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 2.34cuda
cuda
Checkpoint saved at 2025-03-03 10:10:42
Episode 499	Average Score: -0.84Checkpoint saved at 2025-03-03 10:11:45
Episode 997	Average Score: 0.401

[I 2025-03-03 10:13:01,923] Trial 70 finished with value: 0.42100000000000093 and parameters: {'BUFFER_SIZE': 986614, 'BATCH_SIZE': 204, 'GAMMA': 0.8903925653222208, 'TAU': 0.005572624058681535, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.42cuda
cuda
Checkpoint saved at 2025-03-03 10:13:02
Episode 499	Average Score: -1.88Checkpoint saved at 2025-03-03 10:13:56
Episode 986	Average Score: 0.99 New best model saved! Score: 1.02
Episode 990	Average Score: 0.98 New best model saved! Score: 1.03
Episode 992	Average Score: 1.02 New best model saved! Score: 1.03
Episode 993	Average Score: 1.03 New best model saved! Score: 1.04
Episode 994	Average Score: 1.04 New best model saved! Score: 1.04
Episode 995	Average Score: 1.04 New best model saved! Score: 1.05
Episode 997	Average Score: 1.04

[I 2025-03-03 10:14:58,334] Trial 71 finished with value: 1.0610000000000002 and parameters: {'BUFFER_SIZE': 809752, 'BATCH_SIZE': 245, 'GAMMA': 0.8658029093850939, 'TAU': 0.005147773000194972, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


 New best model saved! Score: 1.14
Episode 999	Average Score: 1.06cuda
cuda
Checkpoint saved at 2025-03-03 10:14:58
Episode 499	Average Score: -1.77Checkpoint saved at 2025-03-03 10:15:48
Episode 778	Average Score: 0.96 New best model saved! Score: 1.01
Episode 783	Average Score: 0.97 New best model saved! Score: 1.01
Episode 792	Average Score: 1.00 New best model saved! Score: 1.03
Episode 793	Average Score: 1.03 New best model saved! Score: 1.05
Episode 794	Average Score: 1.05 New best model saved! Score: 1.06
Episode 795	Average Score: 1.06 New best model saved! Score: 1.07
Episode 796	Average Score: 1.07 New best model saved! Score: 1.09
Episode 798	Average Score: 1.08 New best model saved! Score: 1.10
Episode 800	Average Score: 1.09 New best model saved! Score: 1.18
Episode 801	Average Score: 1.18 New best model saved! Score: 1.20
Episode 803	Average Score: 1.17 New best model saved! Score: 1.21
Episode 821	Average Score: 1.20 New best model saved! Score: 1.21
Episode 823	Average 

[I 2025-03-03 10:16:43,389] Trial 72 finished with value: 1.677999999999999 and parameters: {'BUFFER_SIZE': 826667, 'BATCH_SIZE': 223, 'GAMMA': 0.8716030918218789, 'TAU': 0.00460291342961143, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.68cuda
cuda
Checkpoint saved at 2025-03-03 10:16:43
Episode 499	Average Score: -1.71Checkpoint saved at 2025-03-03 10:17:36
Episode 807	Average Score: 1.000 New best model saved! Score: 1.02
Episode 808	Average Score: 1.02 New best model saved! Score: 1.03
Episode 809	Average Score: 1.03 New best model saved! Score: 1.08
Episode 810	Average Score: 1.08 New best model saved! Score: 1.09
Episode 811	Average Score: 1.09 New best model saved! Score: 1.09
Episode 813	Average Score: 1.09 New best model saved! Score: 1.15
Episode 814	Average Score: 1.15 New best model saved! Score: 1.15
Episode 815	Average Score: 1.15 New best model saved! Score: 1.24
Episode 845	Average Score: 1.21 New best model saved! Score: 1.28
Episode 846	Average Score: 1.28 New best model saved! Score: 1.32
Episode 847	Average Score: 1.32 New best model saved! Score: 1.34
Episode 848	Average Score: 1.34 New best model saved! Score: 1.37
Episode 849	Average Score: 1.37 New best model saved! 

[I 2025-03-03 10:18:37,516] Trial 73 finished with value: 2.2909999999999977 and parameters: {'BUFFER_SIZE': 904500, 'BATCH_SIZE': 218, 'GAMMA': 0.8618032800384808, 'TAU': 0.004990960912471859, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 2.29cuda
cuda
Checkpoint saved at 2025-03-03 10:18:37
Episode 499	Average Score: -1.68Checkpoint saved at 2025-03-03 10:19:32
Episode 658	Average Score: 0.973 New best model saved! Score: 1.02
Episode 660	Average Score: 1.00 New best model saved! Score: 1.02
Episode 689	Average Score: 0.99 New best model saved! Score: 1.02
Episode 692	Average Score: 1.02 New best model saved! Score: 1.03
Episode 693	Average Score: 1.03 New best model saved! Score: 1.03
Episode 694	Average Score: 1.03 New best model saved! Score: 1.05
Episode 704	Average Score: 1.04 New best model saved! Score: 1.07
Episode 706	Average Score: 1.04 New best model saved! Score: 1.08
Episode 708	Average Score: 1.06 New best model saved! Score: 1.08
Episode 709	Average Score: 1.08 New best model saved! Score: 1.11
Episode 710	Average Score: 1.11 New best model saved! Score: 1.14
Episode 711	Average Score: 1.14 New best model saved! Score: 1.16
Episode 712	Average Score: 1.16 New best model saved! 

[I 2025-03-03 10:20:46,252] Trial 74 finished with value: 2.552999999999994 and parameters: {'BUFFER_SIZE': 782923, 'BATCH_SIZE': 255, 'GAMMA': 0.8805537293666984, 'TAU': 0.005324295529247756, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 2.55cuda
cuda
Checkpoint saved at 2025-03-03 10:20:46
Episode 499	Average Score: -2.15Checkpoint saved at 2025-03-03 10:21:36
Episode 805	Average Score: 0.961 New best model saved! Score: 1.00
Episode 806	Average Score: 1.00 New best model saved! Score: 1.01
Episode 809	Average Score: 1.01 New best model saved! Score: 1.01
Episode 810	Average Score: 1.01 New best model saved! Score: 1.03
Episode 811	Average Score: 1.03 New best model saved! Score: 1.04
Episode 865	Average Score: 1.02 New best model saved! Score: 1.04
Episode 866	Average Score: 1.04 New best model saved! Score: 1.06
Episode 867	Average Score: 1.06 New best model saved! Score: 1.12
Episode 869	Average Score: 1.12 New best model saved! Score: 1.15
Episode 871	Average Score: 1.14 New best model saved! Score: 1.16
Episode 872	Average Score: 1.16 New best model saved! Score: 1.18
Episode 880	Average Score: 1.17 New best model saved! Score: 1.20
Episode 881	Average Score: 1.20 New best model saved! 

[I 2025-03-03 10:22:28,925] Trial 75 finished with value: 1.2670000000000001 and parameters: {'BUFFER_SIZE': 772126, 'BATCH_SIZE': 255, 'GAMMA': 0.8774215855443811, 'TAU': 0.005963696279399624, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.27cuda
cuda
Checkpoint saved at 2025-03-03 10:22:29
Episode 499	Average Score: -0.67Checkpoint saved at 2025-03-03 10:23:32
Episode 762	Average Score: 0.961 New best model saved! Score: 1.00
Episode 770	Average Score: 0.98 New best model saved! Score: 1.02
Episode 771	Average Score: 1.02 New best model saved! Score: 1.05
Episode 774	Average Score: 1.04 New best model saved! Score: 1.05
Episode 776	Average Score: 1.05 New best model saved! Score: 1.07
Episode 779	Average Score: 1.05 New best model saved! Score: 1.07
Episode 780	Average Score: 1.07 New best model saved! Score: 1.11
Episode 785	Average Score: 1.11 New best model saved! Score: 1.12
Episode 786	Average Score: 1.12 New best model saved! Score: 1.12
Episode 787	Average Score: 1.12 New best model saved! Score: 1.14
Episode 788	Average Score: 1.14 New best model saved! Score: 1.17
Episode 794	Average Score: 1.16 New best model saved! Score: 1.20
Episode 798	Average Score: 1.19 New best model saved! 

[I 2025-03-03 10:24:42,307] Trial 76 finished with value: 1.524 and parameters: {'BUFFER_SIZE': 697851, 'BATCH_SIZE': 236, 'GAMMA': 0.8841085180635703, 'TAU': 0.005678173490250432, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.52cuda
cuda
Checkpoint saved at 2025-03-03 10:24:42
Episode 499	Average Score: -1.75Checkpoint saved at 2025-03-03 10:25:37
Episode 998	Average Score: 0.588

[I 2025-03-03 10:26:33,227] Trial 77 finished with value: 0.5750000000000006 and parameters: {'BUFFER_SIZE': 648087, 'BATCH_SIZE': 247, 'GAMMA': 0.9099307033684072, 'TAU': 0.00633294480982453, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.58cuda
cuda
Checkpoint saved at 2025-03-03 10:26:33
Episode 499	Average Score: -0.28Checkpoint saved at 2025-03-03 10:27:33
Episode 997	Average Score: 0.860

[I 2025-03-03 10:28:46,727] Trial 78 finished with value: 0.8759999999999993 and parameters: {'BUFFER_SIZE': 743436, 'BATCH_SIZE': 181, 'GAMMA': 0.8802653472392988, 'TAU': 0.0043655138309910595, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.88cuda
cuda
Checkpoint saved at 2025-03-03 10:28:46
Episode 499	Average Score: -2.22Checkpoint saved at 2025-03-03 10:29:31
Episode 998	Average Score: -0.22

[I 2025-03-03 10:30:19,242] Trial 79 finished with value: -0.10499999999999941 and parameters: {'BUFFER_SIZE': 862965, 'BATCH_SIZE': 139, 'GAMMA': 0.8932792886718057, 'TAU': 0.005337841013274943, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: -0.10cuda
cuda
Checkpoint saved at 2025-03-03 10:30:19
Episode 499	Average Score: -1.99Checkpoint saved at 2025-03-03 10:31:02
Episode 878	Average Score: 0.93 New best model saved! Score: 1.03
Episode 879	Average Score: 1.03 New best model saved! Score: 1.04
Episode 890	Average Score: 1.04 New best model saved! Score: 1.06
Episode 894	Average Score: 1.03 New best model saved! Score: 1.14
Episode 895	Average Score: 1.14 New best model saved! Score: 1.15
Episode 896	Average Score: 1.15 New best model saved! Score: 1.17
Episode 905	Average Score: 1.13 New best model saved! Score: 1.24
Episode 924	Average Score: 1.20 New best model saved! Score: 1.32
Episode 997	Average Score: 0.62

[I 2025-03-03 10:31:48,937] Trial 80 finished with value: 0.6120000000000002 and parameters: {'BUFFER_SIZE': 931467, 'BATCH_SIZE': 254, 'GAMMA': 0.9011535826726724, 'TAU': 0.003297677081454912, 'UPDATE_EVERY': 7}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.61cuda
cuda
Checkpoint saved at 2025-03-03 10:31:49
Episode 499	Average Score: -1.41Checkpoint saved at 2025-03-03 10:32:43
Episode 998	Average Score: 0.331

[I 2025-03-03 10:33:46,031] Trial 81 finished with value: 0.3180000000000006 and parameters: {'BUFFER_SIZE': 798993, 'BATCH_SIZE': 242, 'GAMMA': 0.8681881373186916, 'TAU': 0.005286543947947085, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.32cuda
cuda
Checkpoint saved at 2025-03-03 10:33:46
Episode 499	Average Score: -1.03Checkpoint saved at 2025-03-03 10:34:39
Episode 758	Average Score: 0.972 New best model saved! Score: 1.00
Episode 759	Average Score: 1.00 New best model saved! Score: 1.01
Episode 773	Average Score: 0.97 New best model saved! Score: 1.02
Episode 774	Average Score: 1.02 New best model saved! Score: 1.05
Episode 775	Average Score: 1.05 New best model saved! Score: 1.05
Episode 787	Average Score: 1.05 New best model saved! Score: 1.08
Episode 789	Average Score: 1.08 New best model saved! Score: 1.09
Episode 791	Average Score: 1.09 New best model saved! Score: 1.13
Episode 792	Average Score: 1.13 New best model saved! Score: 1.17
Episode 793	Average Score: 1.17 New best model saved! Score: 1.17
Episode 794	Average Score: 1.17 New best model saved! Score: 1.17
Episode 795	Average Score: 1.17 New best model saved! Score: 1.19
Episode 796	Average Score: 1.19 New best model saved! 

[I 2025-03-03 10:35:39,753] Trial 82 finished with value: 1.6849999999999983 and parameters: {'BUFFER_SIZE': 847827, 'BATCH_SIZE': 236, 'GAMMA': 0.8722226095226487, 'TAU': 0.004956931616300751, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.68cuda
cuda
Checkpoint saved at 2025-03-03 10:35:39
Episode 499	Average Score: -2.19Checkpoint saved at 2025-03-03 10:36:34
Episode 890	Average Score: 0.973 New best model saved! Score: 1.01
Episode 891	Average Score: 1.01 New best model saved! Score: 1.03
Episode 908	Average Score: 1.02 New best model saved! Score: 1.04
Episode 909	Average Score: 1.04 New best model saved! Score: 1.04
Episode 910	Average Score: 1.04 New best model saved! Score: 1.06
Episode 911	Average Score: 1.06 New best model saved! Score: 1.09
Episode 913	Average Score: 1.09 New best model saved! Score: 1.13
Episode 914	Average Score: 1.13 New best model saved! Score: 1.14
Episode 916	Average Score: 1.13 New best model saved! Score: 1.14
Episode 921	Average Score: 1.13 New best model saved! Score: 1.17
Episode 929	Average Score: 1.16 New best model saved! Score: 1.18
Episode 939	Average Score: 1.14 New best model saved! Score: 1.21
Episode 940	Average Score: 1.21 New best model saved! 

[I 2025-03-03 10:37:35,405] Trial 83 finished with value: 1.679999999999999 and parameters: {'BUFFER_SIZE': 781793, 'BATCH_SIZE': 247, 'GAMMA': 0.8857870473467438, 'TAU': 0.0060101651625316475, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.68cuda
cuda
Checkpoint saved at 2025-03-03 10:37:35
Episode 499	Average Score: -0.89Checkpoint saved at 2025-03-03 10:38:40
Episode 740	Average Score: 0.991 New best model saved! Score: 1.03
Episode 745	Average Score: 1.02 New best model saved! Score: 1.04
Episode 749	Average Score: 1.03 New best model saved! Score: 1.07
Episode 750	Average Score: 1.07 New best model saved! Score: 1.08
Episode 751	Average Score: 1.08 New best model saved! Score: 1.10
Episode 752	Average Score: 1.10 New best model saved! Score: 1.14
Episode 773	Average Score: 1.13 New best model saved! Score: 1.15
Episode 781	Average Score: 1.12 New best model saved! Score: 1.15
Episode 784	Average Score: 1.14 New best model saved! Score: 1.16
Episode 785	Average Score: 1.16 New best model saved! Score: 1.16
Episode 810	Average Score: 1.15 New best model saved! Score: 1.18
Episode 823	Average Score: 1.17 New best model saved! Score: 1.21
Episode 828	Average Score: 1.20 New best model saved! 

[I 2025-03-03 10:39:54,243] Trial 84 finished with value: 1.5189999999999995 and parameters: {'BUFFER_SIZE': 827434, 'BATCH_SIZE': 219, 'GAMMA': 0.859464720118632, 'TAU': 0.004661294717880948, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.52cuda
cuda
Checkpoint saved at 2025-03-03 10:39:54
Episode 499	Average Score: -1.55Checkpoint saved at 2025-03-03 10:40:44
Episode 799	Average Score: 0.99 New best model saved! Score: 1.03
Episode 809	Average Score: 0.93 New best model saved! Score: 1.03
Episode 814	Average Score: 1.00 New best model saved! Score: 1.08
Episode 875	Average Score: 1.06 New best model saved! Score: 1.10
Episode 878	Average Score: 1.09 New best model saved! Score: 1.14
Episode 879	Average Score: 1.14 New best model saved! Score: 1.16
Episode 880	Average Score: 1.16 New best model saved! Score: 1.19
Episode 881	Average Score: 1.19 New best model saved! Score: 1.23
Episode 882	Average Score: 1.23 New best model saved! Score: 1.36
Episode 884	Average Score: 1.35 New best model saved! Score: 1.38
Episode 885	Average Score: 1.38 New best model saved! Score: 1.38
Episode 886	Average Score: 1.38 New best model saved! Score: 1.39
Episode 888	Average Score: 1.39 New best model saved! S

[I 2025-03-03 10:41:46,847] Trial 85 finished with value: 1.7749999999999964 and parameters: {'BUFFER_SIZE': 731029, 'BATCH_SIZE': 250, 'GAMMA': 0.8794091162274714, 'TAU': 0.006521465278523027, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.77cuda
cuda
Checkpoint saved at 2025-03-03 10:41:46
Episode 499	Average Score: -1.74Checkpoint saved at 2025-03-03 10:42:36
Episode 997	Average Score: 0.712

[I 2025-03-03 10:43:28,550] Trial 86 finished with value: 0.7080000000000001 and parameters: {'BUFFER_SIZE': 891931, 'BATCH_SIZE': 130, 'GAMMA': 0.8886562449331121, 'TAU': 0.005433509268534706, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.71cuda
cuda
Checkpoint saved at 2025-03-03 10:43:28
Episode 499	Average Score: -1.78Checkpoint saved at 2025-03-03 10:44:25
Episode 892	Average Score: 0.92 New best model saved! Score: 1.04
Episode 893	Average Score: 1.04 New best model saved! Score: 1.09
Episode 894	Average Score: 1.09 New best model saved! Score: 1.11
Episode 902	Average Score: 1.05 New best model saved! Score: 1.11
Episode 918	Average Score: 1.06 New best model saved! Score: 1.13
Episode 919	Average Score: 1.13 New best model saved! Score: 1.16
Episode 920	Average Score: 1.16 New best model saved! Score: 1.17
Episode 921	Average Score: 1.17 New best model saved! Score: 1.20
Episode 923	Average Score: 1.20 New best model saved! Score: 1.21
Episode 924	Average Score: 1.21 New best model saved! Score: 1.27
Episode 930	Average Score: 1.27 New best model saved! Score: 1.33
Episode 932	Average Score: 1.33 New best model saved! Score: 1.36
Episode 936	Average Score: 1.36 New best model saved! S

[I 2025-03-03 10:45:30,482] Trial 87 finished with value: 1.7949999999999986 and parameters: {'BUFFER_SIZE': 951187, 'BATCH_SIZE': 123, 'GAMMA': 0.8488337319554966, 'TAU': 0.005825875702885838, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


 New best model saved! Score: 1.76
Episode 998	Average Score: 1.76 New best model saved! Score: 1.79
Episode 999	Average Score: 1.79cuda
cuda
Checkpoint saved at 2025-03-03 10:45:30
Episode 499	Average Score: -1.16Checkpoint saved at 2025-03-03 10:46:25
Episode 736	Average Score: 0.981 New best model saved! Score: 1.00
Episode 759	Average Score: 0.99 New best model saved! Score: 1.02
Episode 766	Average Score: 1.01 New best model saved! Score: 1.04
Episode 771	Average Score: 1.01 New best model saved! Score: 1.05
Episode 773	Average Score: 1.05 New best model saved! Score: 1.07
Episode 774	Average Score: 1.07 New best model saved! Score: 1.08
Episode 775	Average Score: 1.08 New best model saved! Score: 1.12
Episode 776	Average Score: 1.12 New best model saved! Score: 1.15
Episode 878	Average Score: 1.14 New best model saved! Score: 1.15
Episode 888	Average Score: 1.12 New best model saved! Score: 1.18
Episode 929	Average Score: 1.17 New best model saved! Score: 1.18
Episode 930	Average

[I 2025-03-03 10:47:25,653] Trial 88 finished with value: 1.3019999999999998 and parameters: {'BUFFER_SIZE': 764135, 'BATCH_SIZE': 109, 'GAMMA': 0.8637500032374424, 'TAU': 0.006208074123864548, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.30cuda
cuda
Checkpoint saved at 2025-03-03 10:47:25
Episode 499	Average Score: -1.73Checkpoint saved at 2025-03-03 10:48:11
Episode 997	Average Score: 0.997

[I 2025-03-03 10:48:59,522] Trial 89 finished with value: 0.9880000000000001 and parameters: {'BUFFER_SIZE': 702616, 'BATCH_SIZE': 167, 'GAMMA': 0.8711682640895287, 'TAU': 0.005206728562071527, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.99cuda
cuda
Checkpoint saved at 2025-03-03 10:48:59
Episode 499	Average Score: -1.74Checkpoint saved at 2025-03-03 10:49:41
Episode 998	Average Score: -1.41

[I 2025-03-03 10:50:26,172] Trial 90 finished with value: -1.3239999999999992 and parameters: {'BUFFER_SIZE': 237587, 'BATCH_SIZE': 211, 'GAMMA': 0.9850522881953342, 'TAU': 0.004152673690133054, 'UPDATE_EVERY': 6}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: -1.32cuda
cuda
Checkpoint saved at 2025-03-03 10:50:26
Episode 499	Average Score: -1.53Checkpoint saved at 2025-03-03 10:51:18
Episode 844	Average Score: 0.992 New best model saved! Score: 1.02
Episode 845	Average Score: 1.02 New best model saved! Score: 1.03
Episode 846	Average Score: 1.03 New best model saved! Score: 1.04
Episode 847	Average Score: 1.04 New best model saved! Score: 1.07
Episode 851	Average Score: 1.04 New best model saved! Score: 1.08
Episode 852	Average Score: 1.08 New best model saved! Score: 1.10
Episode 853	Average Score: 1.10 New best model saved! Score: 1.13
Episode 854	Average Score: 1.13 New best model saved! Score: 1.16
Episode 856	Average Score: 1.16 New best model saved! Score: 1.16
Episode 873	Average Score: 1.14 New best model saved! Score: 1.16
Episode 875	Average Score: 1.16 New best model saved! Score: 1.19
Episode 876	Average Score: 1.19 New best model saved! Score: 1.22
Episode 877	Average Score: 1.22 New best model saved!

[I 2025-03-03 10:52:15,334] Trial 91 finished with value: 1.0870000000000002 and parameters: {'BUFFER_SIZE': 898092, 'BATCH_SIZE': 197, 'GAMMA': 0.8618212713206934, 'TAU': 0.004972653009444643, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.09cuda
cuda
Checkpoint saved at 2025-03-03 10:52:15
Episode 499	Average Score: -2.45Checkpoint saved at 2025-03-03 10:53:08
Episode 889	Average Score: 0.962 New best model saved! Score: 1.01
Episode 895	Average Score: 0.99 New best model saved! Score: 1.01
Episode 896	Average Score: 1.01 New best model saved! Score: 1.02
Episode 898	Average Score: 1.01 New best model saved! Score: 1.04
Episode 899	Average Score: 1.04 New best model saved! Score: 1.09
Episode 900	Average Score: 1.09 New best model saved! Score: 1.12
Episode 901	Average Score: 1.12 New best model saved! Score: 1.15
Episode 902	Average Score: 1.15 New best model saved! Score: 1.19
Episode 903	Average Score: 1.19 New best model saved! Score: 1.21
Episode 904	Average Score: 1.21 New best model saved! Score: 1.21
Episode 905	Average Score: 1.21 New best model saved! Score: 1.24
Episode 906	Average Score: 1.24 New best model saved! Score: 1.27
Episode 907	Average Score: 1.27 New best model saved! 

[I 2025-03-03 10:54:07,326] Trial 92 finished with value: 2.000999999999999 and parameters: {'BUFFER_SIZE': 917055, 'BATCH_SIZE': 222, 'GAMMA': 0.8529810715998091, 'TAU': 0.0038977039132483864, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


 New best model saved! Score: 2.00
Episode 999	Average Score: 2.00cuda
cuda
Checkpoint saved at 2025-03-03 10:54:07
Episode 499	Average Score: -1.55Checkpoint saved at 2025-03-03 10:55:00
Episode 702	Average Score: 0.930 New best model saved! Score: 1.02
Episode 703	Average Score: 1.02 New best model saved! Score: 1.03
Episode 704	Average Score: 1.03 New best model saved! Score: 1.03
Episode 707	Average Score: 1.02 New best model saved! Score: 1.07
Episode 708	Average Score: 1.07 New best model saved! Score: 1.10
Episode 709	Average Score: 1.10 New best model saved! Score: 1.12
Episode 711	Average Score: 1.11 New best model saved! Score: 1.13
Episode 776	Average Score: 1.01 New best model saved! Score: 1.13
Episode 781	Average Score: 1.11 New best model saved! Score: 1.14
Episode 823	Average Score: 1.12 New best model saved! Score: 1.15
Episode 824	Average Score: 1.15 New best model saved! Score: 1.16
Episode 836	Average Score: 1.11 New best model saved! Score: 1.20
Episode 837	Average

[I 2025-03-03 10:55:59,829] Trial 93 finished with value: 2.0529999999999986 and parameters: {'BUFFER_SIZE': 841336, 'BATCH_SIZE': 217, 'GAMMA': 0.8436199219767504, 'TAU': 0.00493229575857693, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 2.05cuda
cuda
Checkpoint saved at 2025-03-03 10:55:59
Episode 499	Average Score: -1.71Checkpoint saved at 2025-03-03 10:56:53
Episode 672	Average Score: 0.975 New best model saved! Score: 1.00
Episode 700	Average Score: 0.97 New best model saved! Score: 1.01
Episode 707	Average Score: 0.99 New best model saved! Score: 1.04
Episode 710	Average Score: 1.03 New best model saved! Score: 1.06
Episode 711	Average Score: 1.06 New best model saved! Score: 1.07
Episode 712	Average Score: 1.07 New best model saved! Score: 1.09
Episode 713	Average Score: 1.09 New best model saved! Score: 1.10
Episode 715	Average Score: 1.09 New best model saved! Score: 1.10
Episode 716	Average Score: 1.10 New best model saved! Score: 1.12
Episode 717	Average Score: 1.12 New best model saved! Score: 1.13
Episode 718	Average Score: 1.13 New best model saved! Score: 1.14
Episode 720	Average Score: 1.13 New best model saved! Score: 1.15
Episode 722	Average Score: 1.14 New best model saved! 

[I 2025-03-03 10:58:06,500] Trial 94 finished with value: 1.431999999999996 and parameters: {'BUFFER_SIZE': 967312, 'BATCH_SIZE': 228, 'GAMMA': 0.880696858212107, 'TAU': 0.0055976909961257485, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.43cuda
cuda
Checkpoint saved at 2025-03-03 10:58:06
Episode 499	Average Score: -1.67Checkpoint saved at 2025-03-03 10:59:00
Episode 997	Average Score: 0.161

[I 2025-03-03 10:59:59,450] Trial 95 finished with value: 0.2080000000000006 and parameters: {'BUFFER_SIZE': 809189, 'BATCH_SIZE': 233, 'GAMMA': 0.8761523946490147, 'TAU': 0.004671109072784272, 'UPDATE_EVERY': 3}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 0.21cuda
cuda
Checkpoint saved at 2025-03-03 10:59:59
Episode 499	Average Score: -0.58Checkpoint saved at 2025-03-03 11:01:04
Episode 765	Average Score: 1.000 New best model saved! Score: 1.06
Episode 805	Average Score: 1.04 New best model saved! Score: 1.07
Episode 806	Average Score: 1.07 New best model saved! Score: 1.09
Episode 807	Average Score: 1.09 New best model saved! Score: 1.09
Episode 808	Average Score: 1.09 New best model saved! Score: 1.12
Episode 823	Average Score: 1.10 New best model saved! Score: 1.14
Episode 827	Average Score: 1.14 New best model saved! Score: 1.18
Episode 828	Average Score: 1.18 New best model saved! Score: 1.18
Episode 846	Average Score: 1.17 New best model saved! Score: 1.21
Episode 848	Average Score: 1.21 New best model saved! Score: 1.21
Episode 854	Average Score: 1.19 New best model saved! Score: 1.23
Episode 855	Average Score: 1.23 New best model saved! Score: 1.26
Episode 863	Average Score: 1.24 New best model saved! 

[I 2025-03-03 11:02:19,090] Trial 96 finished with value: 1.6669999999999994 and parameters: {'BUFFER_SIZE': 888975, 'BATCH_SIZE': 243, 'GAMMA': 0.8583264387057585, 'TAU': 0.005379924019809608, 'UPDATE_EVERY': 2}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.67cuda
cuda
Checkpoint saved at 2025-03-03 11:02:19
Episode 499	Average Score: 0.00Checkpoint saved at 2025-03-03 11:03:56
Episode 678	Average Score: 1.004 New best model saved! Score: 1.02
Episode 679	Average Score: 1.02 New best model saved! Score: 1.08
Episode 680	Average Score: 1.08 New best model saved! Score: 1.12
Episode 691	Average Score: 1.11 New best model saved! Score: 1.15
Episode 693	Average Score: 1.15 New best model saved! Score: 1.19
Episode 695	Average Score: 1.17 New best model saved! Score: 1.20
Episode 696	Average Score: 1.20 New best model saved! Score: 1.21
Episode 698	Average Score: 1.20 New best model saved! Score: 1.26
Episode 699	Average Score: 1.26 New best model saved! Score: 1.27
Episode 700	Average Score: 1.27 New best model saved! Score: 1.29
Episode 701	Average Score: 1.29 New best model saved! Score: 1.32
Episode 707	Average Score: 1.31 New best model saved! Score: 1.33
Episode 710	Average Score: 1.33 New best model saved! S

[I 2025-03-03 11:05:45,036] Trial 97 finished with value: 2.1249999999999987 and parameters: {'BUFFER_SIZE': 874970, 'BATCH_SIZE': 240, 'GAMMA': 0.8701006978030417, 'TAU': 0.0050531681749664695, 'UPDATE_EVERY': 1}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 2.12cuda
cuda
Checkpoint saved at 2025-03-03 11:05:45
Episode 499	Average Score: -2.36Checkpoint saved at 2025-03-03 11:06:29
Episode 916	Average Score: 0.90 New best model saved! Score: 1.02
Episode 917	Average Score: 1.02 New best model saved! Score: 1.06
Episode 920	Average Score: 1.01 New best model saved! Score: 1.09
Episode 922	Average Score: 1.09 New best model saved! Score: 1.13
Episode 927	Average Score: 1.11 New best model saved! Score: 1.18
Episode 929	Average Score: 1.18 New best model saved! Score: 1.21
Episode 930	Average Score: 1.21 New best model saved! Score: 1.24
Episode 931	Average Score: 1.24 New best model saved! Score: 1.25
Episode 932	Average Score: 1.25 New best model saved! Score: 1.28
Episode 933	Average Score: 1.28 New best model saved! Score: 1.29
Episode 940	Average Score: 1.27 New best model saved! Score: 1.33
Episode 941	Average Score: 1.33 New best model saved! Score: 1.38
Episode 942	Average Score: 1.38 New best model saved! S

[I 2025-03-03 11:07:16,466] Trial 98 finished with value: 1.7899999999999994 and parameters: {'BUFFER_SIZE': 742233, 'BATCH_SIZE': 118, 'GAMMA': 0.8657794121446323, 'TAU': 0.004321547052141618, 'UPDATE_EVERY': 4}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.79cuda
cuda
Checkpoint saved at 2025-03-03 11:07:16
Episode 499	Average Score: -1.30Checkpoint saved at 2025-03-03 11:08:34
Episode 691	Average Score: 0.971 New best model saved! Score: 1.00
Episode 692	Average Score: 1.00 New best model saved! Score: 1.02
Episode 693	Average Score: 1.02 New best model saved! Score: 1.04
Episode 694	Average Score: 1.04 New best model saved! Score: 1.05
Episode 695	Average Score: 1.05 New best model saved! Score: 1.07
Episode 696	Average Score: 1.07 New best model saved! Score: 1.11
Episode 697	Average Score: 1.11 New best model saved! Score: 1.13
Episode 698	Average Score: 1.13 New best model saved! Score: 1.14
Episode 700	Average Score: 1.13 New best model saved! Score: 1.17
Episode 702	Average Score: 1.17 New best model saved! Score: 1.18
Episode 703	Average Score: 1.18 New best model saved! Score: 1.19
Episode 705	Average Score: 1.18 New best model saved! Score: 1.24
Episode 706	Average Score: 1.24 New best model saved! 

[I 2025-03-03 11:10:03,247] Trial 99 finished with value: 1.955999999999999 and parameters: {'BUFFER_SIZE': 481379, 'BATCH_SIZE': 101, 'GAMMA': 0.8936556851531223, 'TAU': 0.006086884306415569, 'UPDATE_EVERY': 1}. Best is trial 61 with value: 5.441999999999986.


Episode 999	Average Score: 1.96Best hyperparameters:  {'BUFFER_SIZE': 757624, 'BATCH_SIZE': 228, 'GAMMA': 0.8777525034865641, 'TAU': 0.005566248249524419, 'UPDATE_EVERY': 3}


In [9]:
print(study.best_params)

{'BUFFER_SIZE': 757624, 'BATCH_SIZE': 228, 'GAMMA': 0.8777525034865641, 'TAU': 0.005566248249524419, 'UPDATE_EVERY': 3}
