In [1]:
import os
import time
import re
from typing import List
from datetime import datetime
import numpy as np

import gymnasium as gym
from gymnasium.wrappers import AtariPreprocessing, FrameStack, RecordEpisodeStatistics, RecordVideo

from stable_baselines3.common.buffers import ReplayBuffer
from stable_baselines3.common.save_util import load_from_pkl, save_to_pkl

import torch
import torch.nn as nn

from common.swin_transformer.models.swin_transformer import SwinTransformer

from torch.utils.tensorboard import SummaryWriter
from typing import Dict

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Tutel has not been installed. To use Swin-MoE, please install Tutel; otherwise, just ignore this.


In [2]:
def step_trigger(step: int):
    return step % 400_000 == 0

def make_env(env_name="ALE/Pong-v5", seed=42):
    env = gym.make(env_name, render_mode="rgb_array", full_action_space=False, frameskip=1)
    env = AtariPreprocessing(env)
    env = FrameStack(env, 4)
    env = RecordEpisodeStatistics(env)
    # A video will be recorded every 400,000 steps.
    env = RecordVideo(env, "runs/videos/", step_trigger=step_trigger, video_length=1000)
    env.observation_space.seed(seed)
    env.action_space.seed(seed)

    return env

In [3]:
class SwinDQN(nn.Module):
    def __init__(self, num_channels, num_actions):
        super(SwinDQN, self).__init__()

        self.swin = SwinTransformer(
            img_size=84,
            in_chans=num_channels,
            num_classes=num_actions,
            depths=[2, 3, 2],
            num_heads=[3, 3, 6],
            patch_size=3,
            window_size=7,
            embed_dim=96,
            mlp_ratio=4,
            drop_path_rate=0.1
        )

    def forward(self, x):
        x = x.float() / 255  # Rescale input from [0, 255] to [0, 1]
        return self.swin(x)

swin = SwinDQN(4, 6)
swin

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


SwinDQN(
  (swin): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(4, 96, kernel_size=(3, 3), stride=(3, 3))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0): BasicLayer(
        dim=96, input_resolution=(28, 28), depth=2
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            dim=96, input_resolution=(28, 28), num_heads=3, window_size=7, shift_size=0, mlp_ratio=4
            (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              dim=96, window_size=(7, 7), num_heads=3
              (qkv): Linear(in_features=96, out_features=288, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=96, out_features=96, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
   

- **policy network**: $Q^{A}_{\theta}$
- **target network**: $Q^{B}_{\theta}$

In [4]:
def linear_schedule(start_epsilon: float, end_epsilon: float, duration: int, timestep: int):
    slope = (end_epsilon - start_epsilon) / duration
    return max(slope * timestep + start_epsilon, end_epsilon)

In [5]:
def get_latest_checkpoint_file(files: List[str]) -> any:
    """
    Return the most recent checkpoint file from the passed list of files.

    If multiple files with same datetime are passed, only the first is returned

    :param files: list of file names containing a formatted datetime (=> %d-%m-%Y_%H:%M:%S)
    :return: the file with the most recent date time or ``None`` if no files were found (because of the lack of correctly formatted date in the file name)
    """
    datetime_regex = r"\d{2}-\d{2}-\d{4}_\d{2}:\d{2}:\d{2}"

    latest_file = None
    latest_datetime = datetime.min
    for file in files:
        match = re.search(datetime_regex, file)
        if not match: continue # Go to next element in list if no match is found

        file_datetime = datetime.strptime(match.group(), "%d-%m-%Y_%H:%M:%S")
        if file_datetime > latest_datetime:
            latest_datetime = file_datetime
            latest_file = file

    return latest_file


class DQNAgent:
    def __init__(self, name="CNN_DDQN_Pong-v5", env_name="ALE/Pong-v5"):
        self.name = name

        self.env = make_env(env_name)

        self.start_datetime = None
        self.start_time = None

        # I use the same hyperparameters as this model: https://huggingface.co/sb3/dqn-PongNoFrameskip-v4

        self.MAX_TIMESTEPS = 10_000_000  # Maximum number of total steps
        self.TARGET_UPDATE_INTERVAL = 1000  # Number of steps between the synchronisation of q and target network
        self.LEARNING_STARTS = 100_000  # The number of steps to wait before we start the training, so the agent can explore and store its experience in the replay buffer

        self.TRAIN_FREQUENCY = 4 # Training is done each 4 steps

        self.CHECKPOINT_INTERVAL_EPISODE = 500 # Checkpoint saving interval per episode (a checkpoint will be saved each X episodes)
        
        self.REPLAY_SIZE = 10_000
        self.BATCH_SIZE = 32

        self.GAMMA = 0.99  # Discount rate

        self.EXPLORATION_FRACTION = 0.1  # The fraction of 'TOTAL_TIMESTEPS' it takes from 'EPSILON_START' to 'EPSILON_END'.
        self.EPSILON_INITIAL = 1.0
        self.EPSILON_FINAL = 0.01

        self.epsilon = self.EPSILON_INITIAL  # Exploration probability

        self.memory = ReplayBuffer(
            buffer_size=self.REPLAY_SIZE,
            observation_space=self.env.observation_space,
            action_space=self.env.action_space,
            device=device,
            optimize_memory_usage=True,
            handle_timeout_termination=False
        )

        self.timesteps = 0

        self.policy_network = SwinDQN(4, self.env.action_space.n).to(device)
        self.target_network = SwinDQN(4, self.env.action_space.n).to(device)
        self.target_network.load_state_dict(self.policy_network.state_dict())

        self.optimizer = torch.optim.Adam(self.policy_network.parameters(), lr=0.0001)
        self.loss_fn = nn.SmoothL1Loss()

        # Metrics/Logs
        self.PATH = "runs"
        if not os.path.exists(self.PATH):
            os.makedirs(self.PATH)

        self.CHECKPOINTS_PATH = f"{self.PATH}/checkpoints"
        self.LOGS_PATH = f"{self.PATH}/logs"
        self.VIDEO_PATH = f"{self.PATH}/videos"

        self.is_loaded_from_checkpoint = False
        self.writer = None

    def remember(self, observation, next_observation, action, reward, done, infos):
        self.memory.add(observation, next_observation, action, reward, done, infos)

    def act(self, state):
        # Reduce epsilon when learning started
        if self.timesteps >= self.LEARNING_STARTS:
             # Minus LEARNING_STARTS to takes into account that learning only started after LEARNING_STARTS,
             # and so we want to start reducing epsilon only when learning start
            self.epsilon = linear_schedule(
                self.EPSILON_INITIAL,
                self.EPSILON_FINAL,
                int(self.EXPLORATION_FRACTION * self.MAX_TIMESTEPS),
                self.timesteps - self.LEARNING_STARTS
            )

        if self.timesteps < self.LEARNING_STARTS or np.random.rand() < self.epsilon:
            # Random action
            return np.array(self.env.action_space.sample())
        else:
            with torch.no_grad():
                state_tensor = torch.tensor(np.array(state), device=device).unsqueeze(0)
                q_values = self.policy_network(state_tensor)
                return q_values.argmax(dim=1)[0].cpu().numpy()

    def update_target_network(self):
        self.target_network.load_state_dict(self.policy_network.state_dict())

    def optimize_model(self):
        minibatch = self.memory.sample(self.BATCH_SIZE)

        # Calculate Q values for current states
        # For each q_values, get the action according to the minibatch
        q_values = self.policy_network(minibatch.observations).gather(1, minibatch.actions)

        # Then, calculate the best actions for the next states, and return its indices
        with torch.no_grad():
            best_next_actions = self.policy_network(minibatch.next_observations).argmax(1).unsqueeze(1)

        # Calculate the Q values for the next states using the target network, and return the action according to the best next action returned by the q network
        target_next_q_values = self.target_network(minibatch.next_observations).gather(1, best_next_actions)

        # Calculate the target Q values using Double DQN
        target_q_values = minibatch.rewards + (1 - minibatch.dones) * self.GAMMA * target_next_q_values

        # Compute the loss
        loss = self.loss_fn(q_values, target_q_values)

        # Compute metrics for loss
        if self.timesteps % 100 == 0:
            self.writer.add_scalar("train/loss", loss, self.timesteps)
            self.writer.add_scalar("train/q_values", q_values.squeeze().mean().item(), self.timesteps)
            steps_per_second = int(self.timesteps / (time.time() - self.start_time))
            #print("Steps per second: ", steps_per_second)
            self.writer.add_scalar("train/steps_per_second", steps_per_second, self.timesteps)


        # Optimise Q network
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def save_checkpoint(self):
        if self.start_datetime is None:
            print("SAVE_CHECKPOINT_ERROR: Training need to have started to save a checkpoint.")
            return

        print("Saving checkpoint...")
        current_datetime_str = datetime.now().strftime("%d-%m-%Y_%H:%M:%S")
        start_datetime_str = self.start_datetime.strftime("%d-%m-%Y_%H:%M:%S")

        save_parent_directory = f"{self.CHECKPOINTS_PATH}/{self.name}_{start_datetime_str}"
        save_path = save_parent_directory + "/chkpt_" + current_datetime_str + ".tar"
        replay_buffer_path = save_parent_directory + "/replay_buffer_" + current_datetime_str

        if not os.path.exists(save_parent_directory):
            os.makedirs(save_parent_directory)

        checkpoint = {
            "env": self.env,
            "timesteps": self.timesteps,
            "start_datetime": self.start_datetime,
            "epsilon": self.epsilon,
            "policy_network": self.policy_network.state_dict(),
            "target_network": self.target_network.state_dict(),
            "optimizer": self.optimizer.state_dict(),
        }

        torch.save(checkpoint, save_path)
        # Saving the replay buffer will takes time! But it is needed to properly resume training
        save_to_pkl(replay_buffer_path, self.memory, verbose=1)

        print(f"Checkpoint saved into {save_parent_directory}")

    def load_last_checkpoint(self, path):
        """
        Load the last saved checkpoint found in the given ``path``

        :param path: the path to the directory containing the checkpoint(s)
        """
        print(f"Loading most recent checkpoint from {path}")
        self.is_loaded_from_checkpoint = True

        # Using list comprehension to filter directories and only get the files
        files = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))]

        checkpoint_files = [chkpt_file for chkpt_file in files if "chkpt" in chkpt_file]
        replay_buffer_files = [chkpt_file for chkpt_file in files if "replay_buffer" in chkpt_file]

        checkpoint_file = get_latest_checkpoint_file(checkpoint_files)
        replay_buffer_file = get_latest_checkpoint_file(replay_buffer_files)

        checkpoint: Dict[str, any] = torch.load(path + "/" + checkpoint_file)

        self.env = checkpoint["env"]
        self.timesteps = checkpoint["timesteps"]
        self.start_datetime: datetime = checkpoint["start_datetime"]
        self.start_time = self.start_datetime.timestamp()

        self.epsilon = checkpoint["epsilon"]

        self.policy_network.load_state_dict(checkpoint["policy_network"])
        self.target_network.load_state_dict(checkpoint["target_network"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])

        self.memory: ReplayBuffer = load_from_pkl(path + "/" + replay_buffer_file)
        print("Checkpoint successfully loaded, you can resume the training now.")

    def run(self):
        # Either create a new SummaryWriter or resume from previous one
        if not self.is_loaded_from_checkpoint:
            current_datetime = datetime.now()
            self.start_datetime = current_datetime
            self.start_time = current_datetime.timestamp()

        start_datetime_str = self.start_datetime.strftime("%d-%m-%Y_%H:%M:%S")
        self.writer = SummaryWriter(f"{self.LOGS_PATH}/{self.name}_{start_datetime_str}")
        
        video_folder_path = f"{self.VIDEO_PATH}/{self.name}_{start_datetime_str}"
        if not os.path.exists(video_folder_path):
            os.makedirs(video_folder_path)
        self.env.video_folder = video_folder_path

        while self.timesteps < self.MAX_TIMESTEPS:
            state, _ = self.env.reset()
            done = False

            while not done:
                self.timesteps += 1

                action = self.act(state)
                next_state, reward, terminated, truncated, info = self.env.step(action)
                done = terminated or truncated

                self.remember(state, next_state, action, reward, terminated, info)

                if self.timesteps >= self.LEARNING_STARTS and self.timesteps % self.TRAIN_FREQUENCY == 0:
                    self.optimize_model()

                state = next_state

                if done:
                    mean_reward = np.mean(self.env.return_queue)
                    length_reward = np.mean(self.env.length_queue)
                    
                    # Get episode statistics from info ("episode" key only exist when episode is done)
                    episode_reward = info["episode"]["r"]
                    self.writer.add_scalar("rollout/episodic_return", episode_reward, self.timesteps)
                    self.writer.add_scalar("rollout/episodic_length", info["episode"]["l"], self.timesteps)

                    self.writer.add_scalar("rollout/ep_len_mean", length_reward, self.timesteps)
                    self.writer.add_scalar("rollout/ep_rew_mean", mean_reward, self.timesteps)

                    self.writer.add_scalar("rollout/exploration_rate", self.epsilon, self.timesteps)

                    print(f"Episode {self.env.episode_count} finished (timesteps: {self.timesteps}/{self.MAX_TIMESTEPS})\n"
                          f"Epsilon: {self.epsilon:.2f}, Episode reward: {episode_reward.item()}, Mean reward: {mean_reward:.2f}")

                    if self.env.episode_count % self.CHECKPOINT_INTERVAL_EPISODE == 0:
                        self.save_checkpoint()
                    print("***************************")

                if self.timesteps >= self.LEARNING_STARTS and self.timesteps % self.TARGET_UPDATE_INTERVAL == 0:
                    self.update_target_network()
                    #print("Target model updated.")

        self.save_checkpoint() # Save last checkpoint at the end of training

        self.writer.flush()
        self.writer.close()

In [6]:
agent = DQNAgent()
agent.run()

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
  logger.warn(


Episode 1 finished (timesteps: 822/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -21.00
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-0.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-0.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-0.mp4
Episode 2 finished (timesteps: 1808/10000000)
Epsilon: 1.00, Episode reward: -20.0, Mean reward: -20.50
***************************
Episode 3 finished (timesteps: 2885/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.67
***************************
Episode 4 finished (timesteps: 3707/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.75
***************************
Episode 5 finished (timesteps: 4674/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.80
***************************
Episode 6 finished (timesteps: 5790/10000000)
Epsilon: 1.00, Episode reward: -18.0, Mean reward: -20.33
***************************
Episode 7 finished (timesteps: 6662/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.43
***************************
Episode 8 finished (timesteps: 7643/10000000)
Epsilon: 1.00, Episode reward: -20.0, Mean reward: -20

Episode 63 finished (timesteps: 58414/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.32
***************************
Episode 64 finished (timesteps: 59365/10000000)
Epsilon: 1.00, Episode reward: -20.0, Mean reward: -20.31
***************************
Episode 65 finished (timesteps: 60276/10000000)
Epsilon: 1.00, Episode reward: -20.0, Mean reward: -20.31
***************************
Episode 66 finished (timesteps: 61288/10000000)
Epsilon: 1.00, Episode reward: -20.0, Mean reward: -20.30
***************************
Episode 67 finished (timesteps: 62138/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.31
***************************
Episode 68 finished (timesteps: 62960/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.32
***************************
Episode 69 finished (timesteps: 63861/10000000)
Epsilon: 1.00, Episode reward: -21.0, Mean reward: -20.33
***************************
Episode 70 finished (timesteps: 64679/10000000)
Epsilon: 1.00,

Episode 124 finished (timesteps: 113731/10000000)
Epsilon: 0.99, Episode reward: -18.0, Mean reward: -20.40
***************************
Episode 125 finished (timesteps: 114613/10000000)
Epsilon: 0.99, Episode reward: -21.0, Mean reward: -20.40
***************************
Episode 126 finished (timesteps: 115495/10000000)
Epsilon: 0.98, Episode reward: -21.0, Mean reward: -20.40
***************************
Episode 127 finished (timesteps: 116452/10000000)
Epsilon: 0.98, Episode reward: -20.0, Mean reward: -20.41
***************************
Episode 128 finished (timesteps: 117602/10000000)
Epsilon: 0.98, Episode reward: -20.0, Mean reward: -20.40
***************************
Episode 129 finished (timesteps: 118629/10000000)
Epsilon: 0.98, Episode reward: -18.0, Mean reward: -20.37
***************************
Episode 130 finished (timesteps: 119466/10000000)
Epsilon: 0.98, Episode reward: -21.0, Mean reward: -20.38
***************************
Episode 131 finished (timesteps: 120253/10000000

Episode 185 finished (timesteps: 171534/10000000)
Epsilon: 0.93, Episode reward: -21.0, Mean reward: -20.37
***************************
Episode 186 finished (timesteps: 172591/10000000)
Epsilon: 0.93, Episode reward: -18.0, Mean reward: -20.34
***************************
Episode 187 finished (timesteps: 173616/10000000)
Epsilon: 0.93, Episode reward: -20.0, Mean reward: -20.33
***************************
Episode 188 finished (timesteps: 174434/10000000)
Epsilon: 0.93, Episode reward: -21.0, Mean reward: -20.34
***************************
Episode 189 finished (timesteps: 175504/10000000)
Epsilon: 0.93, Episode reward: -19.0, Mean reward: -20.32
***************************
Episode 190 finished (timesteps: 176328/10000000)
Epsilon: 0.92, Episode reward: -21.0, Mean reward: -20.32
***************************
Episode 191 finished (timesteps: 177226/10000000)
Epsilon: 0.92, Episode reward: -21.0, Mean reward: -20.32
***************************
Episode 192 finished (timesteps: 178148/10000000

Episode 246 finished (timesteps: 228666/10000000)
Epsilon: 0.87, Episode reward: -18.0, Mean reward: -20.29
***************************
Episode 247 finished (timesteps: 229628/10000000)
Epsilon: 0.87, Episode reward: -19.0, Mean reward: -20.28
***************************
Episode 248 finished (timesteps: 230752/10000000)
Epsilon: 0.87, Episode reward: -20.0, Mean reward: -20.27
***************************
Episode 249 finished (timesteps: 231514/10000000)
Epsilon: 0.87, Episode reward: -21.0, Mean reward: -20.28
***************************
Episode 250 finished (timesteps: 232687/10000000)
Epsilon: 0.87, Episode reward: -20.0, Mean reward: -20.28
***************************
Episode 251 finished (timesteps: 233842/10000000)
Epsilon: 0.87, Episode reward: -18.0, Mean reward: -20.26
***************************
Episode 252 finished (timesteps: 234867/10000000)
Epsilon: 0.87, Episode reward: -21.0, Mean reward: -20.27
***************************
Episode 253 finished (timesteps: 235732/10000000

Episode 307 finished (timesteps: 286767/10000000)
Epsilon: 0.82, Episode reward: -20.0, Mean reward: -20.25
***************************
Episode 308 finished (timesteps: 287634/10000000)
Epsilon: 0.81, Episode reward: -21.0, Mean reward: -20.26
***************************
Episode 309 finished (timesteps: 288671/10000000)
Epsilon: 0.81, Episode reward: -19.0, Mean reward: -20.25
***************************
Episode 310 finished (timesteps: 289548/10000000)
Epsilon: 0.81, Episode reward: -21.0, Mean reward: -20.26
***************************
Episode 311 finished (timesteps: 290384/10000000)
Epsilon: 0.81, Episode reward: -20.0, Mean reward: -20.25
***************************
Episode 312 finished (timesteps: 291420/10000000)
Epsilon: 0.81, Episode reward: -19.0, Mean reward: -20.23
***************************
Episode 313 finished (timesteps: 292402/10000000)
Epsilon: 0.81, Episode reward: -20.0, Mean reward: -20.23
***************************
Episode 314 finished (timesteps: 293540/10000000

Episode 368 finished (timesteps: 342823/10000000)
Epsilon: 0.76, Episode reward: -21.0, Mean reward: -20.28
***************************
Episode 369 finished (timesteps: 343749/10000000)
Epsilon: 0.76, Episode reward: -21.0, Mean reward: -20.29
***************************
Episode 370 finished (timesteps: 344647/10000000)
Epsilon: 0.76, Episode reward: -20.0, Mean reward: -20.28
***************************
Episode 371 finished (timesteps: 345647/10000000)
Epsilon: 0.76, Episode reward: -19.0, Mean reward: -20.28
***************************
Episode 372 finished (timesteps: 346773/10000000)
Epsilon: 0.76, Episode reward: -19.0, Mean reward: -20.29
***************************
Episode 373 finished (timesteps: 347608/10000000)
Epsilon: 0.75, Episode reward: -20.0, Mean reward: -20.29
***************************
Episode 374 finished (timesteps: 348486/10000000)
Epsilon: 0.75, Episode reward: -21.0, Mean reward: -20.30
***************************
Episode 375 finished (timesteps: 349381/10000000

Episode 429 finished (timesteps: 400717/10000000)
Epsilon: 0.70, Episode reward: -21.0, Mean reward: -20.22
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-400000.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-400000.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-400000.mp4
Episode 430 finished (timesteps: 401713/10000000)
Epsilon: 0.70, Episode reward: -19.0, Mean reward: -20.20
***************************
Episode 431 finished (timesteps: 402597/10000000)
Epsilon: 0.70, Episode reward: -20.0, Mean reward: -20.20
***************************
Episode 432 finished (timesteps: 403535/10000000)
Epsilon: 0.70, Episode reward: -21.0, Mean reward: -20.20
***************************
Episode 433 finished (timesteps: 404401/10000000)
Epsilon: 0.70, Episode reward: -21.0, Mean reward: -20.20
***************************
Episode 434 finished (timesteps: 405311/10000000)
Epsilon: 0.70, Episode reward: -21.0, Mean reward: -20.21
***************************
Episode 435 finished (timesteps: 406281/10000000)
Epsilon: 0.70, Episode reward: -21.0, Mean reward: -20.22
***************************
Episode 436 finished (timesteps: 407116/10000000)
Epsilon: 0.70, Episod

Episode 490 finished (timesteps: 457285/10000000)
Epsilon: 0.65, Episode reward: -21.0, Mean reward: -20.26
***************************
Episode 491 finished (timesteps: 458215/10000000)
Epsilon: 0.65, Episode reward: -21.0, Mean reward: -20.26
***************************
Episode 492 finished (timesteps: 459158/10000000)
Epsilon: 0.64, Episode reward: -21.0, Mean reward: -20.27
***************************
Episode 493 finished (timesteps: 460192/10000000)
Epsilon: 0.64, Episode reward: -19.0, Mean reward: -20.26
***************************
Episode 494 finished (timesteps: 461075/10000000)
Epsilon: 0.64, Episode reward: -20.0, Mean reward: -20.26
***************************
Episode 495 finished (timesteps: 461985/10000000)
Epsilon: 0.64, Episode reward: -21.0, Mean reward: -20.26
***************************
Episode 496 finished (timesteps: 462941/10000000)
Epsilon: 0.64, Episode reward: -20.0, Mean reward: -20.25
***************************
Episode 497 finished (timesteps: 464184/10000000

Episode 550 finished (timesteps: 513201/10000000)
Epsilon: 0.59, Episode reward: -19.0, Mean reward: -20.22
***************************
Episode 551 finished (timesteps: 513987/10000000)
Epsilon: 0.59, Episode reward: -21.0, Mean reward: -20.22
***************************
Episode 552 finished (timesteps: 514947/10000000)
Epsilon: 0.59, Episode reward: -21.0, Mean reward: -20.22
***************************
Episode 553 finished (timesteps: 515849/10000000)
Epsilon: 0.59, Episode reward: -20.0, Mean reward: -20.21
***************************
Episode 554 finished (timesteps: 516803/10000000)
Epsilon: 0.59, Episode reward: -20.0, Mean reward: -20.22
***************************
Episode 555 finished (timesteps: 517668/10000000)
Epsilon: 0.59, Episode reward: -20.0, Mean reward: -20.23
***************************
Episode 556 finished (timesteps: 518872/10000000)
Epsilon: 0.59, Episode reward: -19.0, Mean reward: -20.24
***************************
Episode 557 finished (timesteps: 519862/10000000

Episode 611 finished (timesteps: 571444/10000000)
Epsilon: 0.53, Episode reward: -21.0, Mean reward: -20.21
***************************
Episode 612 finished (timesteps: 572337/10000000)
Epsilon: 0.53, Episode reward: -21.0, Mean reward: -20.21
***************************
Episode 613 finished (timesteps: 573217/10000000)
Epsilon: 0.53, Episode reward: -21.0, Mean reward: -20.22
***************************
Episode 614 finished (timesteps: 574217/10000000)
Epsilon: 0.53, Episode reward: -20.0, Mean reward: -20.21
***************************
Episode 615 finished (timesteps: 575120/10000000)
Epsilon: 0.53, Episode reward: -21.0, Mean reward: -20.23
***************************
Episode 616 finished (timesteps: 576135/10000000)
Epsilon: 0.53, Episode reward: -20.0, Mean reward: -20.22
***************************
Episode 617 finished (timesteps: 576999/10000000)
Epsilon: 0.53, Episode reward: -20.0, Mean reward: -20.21
***************************
Episode 618 finished (timesteps: 577837/10000000

Episode 672 finished (timesteps: 632983/10000000)
Epsilon: 0.47, Episode reward: -21.0, Mean reward: -20.06
***************************
Episode 673 finished (timesteps: 633866/10000000)
Epsilon: 0.47, Episode reward: -21.0, Mean reward: -20.06
***************************
Episode 674 finished (timesteps: 635258/10000000)
Epsilon: 0.47, Episode reward: -18.0, Mean reward: -20.03
***************************
Episode 675 finished (timesteps: 636105/10000000)
Epsilon: 0.47, Episode reward: -21.0, Mean reward: -20.03
***************************
Episode 676 finished (timesteps: 637491/10000000)
Epsilon: 0.47, Episode reward: -18.0, Mean reward: -20.00
***************************
Episode 677 finished (timesteps: 638731/10000000)
Epsilon: 0.47, Episode reward: -19.0, Mean reward: -20.02
***************************
Episode 678 finished (timesteps: 639539/10000000)
Epsilon: 0.47, Episode reward: -21.0, Mean reward: -20.02
***************************
Episode 679 finished (timesteps: 640643/10000000

Episode 733 finished (timesteps: 702352/10000000)
Epsilon: 0.40, Episode reward: -19.0, Mean reward: -19.75
***************************
Episode 734 finished (timesteps: 703336/10000000)
Epsilon: 0.40, Episode reward: -20.0, Mean reward: -19.76
***************************
Episode 735 finished (timesteps: 704507/10000000)
Epsilon: 0.40, Episode reward: -19.0, Mean reward: -19.74
***************************
Episode 736 finished (timesteps: 705466/10000000)
Epsilon: 0.40, Episode reward: -20.0, Mean reward: -19.75
***************************
Episode 737 finished (timesteps: 706686/10000000)
Epsilon: 0.40, Episode reward: -20.0, Mean reward: -19.75
***************************
Episode 738 finished (timesteps: 708098/10000000)
Epsilon: 0.40, Episode reward: -17.0, Mean reward: -19.71
***************************
Episode 739 finished (timesteps: 709365/10000000)
Epsilon: 0.40, Episode reward: -18.0, Mean reward: -19.68
***************************
Episode 740 finished (timesteps: 710526/10000000

Episode 794 finished (timesteps: 777493/10000000)
Epsilon: 0.33, Episode reward: -17.0, Mean reward: -19.40
***************************
Episode 795 finished (timesteps: 778795/10000000)
Epsilon: 0.33, Episode reward: -20.0, Mean reward: -19.40
***************************
Episode 796 finished (timesteps: 780110/10000000)
Epsilon: 0.33, Episode reward: -19.0, Mean reward: -19.38
***************************
Episode 797 finished (timesteps: 781581/10000000)
Epsilon: 0.33, Episode reward: -17.0, Mean reward: -19.34
***************************
Episode 798 finished (timesteps: 782728/10000000)
Epsilon: 0.32, Episode reward: -18.0, Mean reward: -19.34
***************************
Episode 799 finished (timesteps: 784206/10000000)
Epsilon: 0.32, Episode reward: -17.0, Mean reward: -19.30
***************************
Episode 800 finished (timesteps: 785443/10000000)
Epsilon: 0.32, Episode reward: -20.0, Mean reward: -19.30
***************************
Episode 801 finished (timesteps: 786680/10000000

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-800000.mp4
Episode 812 finished (timesteps: 801923/10000000)
Epsilon: 0.31, Episode reward: -20.0, Mean reward: -19.19
***************************
Episode 813 finished (timesteps: 803172/10000000)
Epsilon: 0.30, Episode reward: -20.0, Mean reward: -19.20
***************************
Episode 814 finished (timesteps: 804452/10000000)
Epsilon: 0.30, Episode reward: -19.0, Mean reward: -19.18
***************************
Episode 815 finished (timesteps: 805489/10000000)
Epsilon: 0.30, Episode reward: -21.0, Mean reward: -19.19
***************************
Episode 816 finished (timesteps: 806732/10000000)
Epsilon: 0.30, Episode reward: -19.0, Mean reward: -19.19
***************************
Episode 817 finished (timesteps: 807929/10000000)
Epsilon: 0.30, Episode reward: -18.0, Mean reward: -19.19
***************************
Episode 818 finished (timesteps: 809016/10000000)
Epsilon: 0.30, Episod

Episode 872 finished (timesteps: 886941/10000000)
Epsilon: 0.22, Episode reward: -17.0, Mean reward: -18.36
***************************
Episode 873 finished (timesteps: 888157/10000000)
Epsilon: 0.22, Episode reward: -20.0, Mean reward: -18.35
***************************
Episode 874 finished (timesteps: 889704/10000000)
Epsilon: 0.22, Episode reward: -15.0, Mean reward: -18.29
***************************
Episode 875 finished (timesteps: 890978/10000000)
Epsilon: 0.22, Episode reward: -19.0, Mean reward: -18.30
***************************
Episode 876 finished (timesteps: 892635/10000000)
Epsilon: 0.22, Episode reward: -19.0, Mean reward: -18.29
***************************
Episode 877 finished (timesteps: 894068/10000000)
Epsilon: 0.21, Episode reward: -17.0, Mean reward: -18.27
***************************
Episode 878 finished (timesteps: 895583/10000000)
Epsilon: 0.21, Episode reward: -16.0, Mean reward: -18.25
***************************
Episode 879 finished (timesteps: 896688/10000000

Episode 933 finished (timesteps: 980314/10000000)
Epsilon: 0.13, Episode reward: -17.0, Mean reward: -17.78
***************************
Episode 934 finished (timesteps: 981831/10000000)
Epsilon: 0.13, Episode reward: -19.0, Mean reward: -17.79
***************************
Episode 935 finished (timesteps: 982922/10000000)
Epsilon: 0.13, Episode reward: -20.0, Mean reward: -17.78
***************************
Episode 936 finished (timesteps: 984621/10000000)
Epsilon: 0.12, Episode reward: -16.0, Mean reward: -17.80
***************************
Episode 937 finished (timesteps: 985949/10000000)
Epsilon: 0.12, Episode reward: -20.0, Mean reward: -17.80
***************************
Episode 938 finished (timesteps: 987261/10000000)
Epsilon: 0.12, Episode reward: -19.0, Mean reward: -17.83
***************************
Episode 939 finished (timesteps: 988186/10000000)
Epsilon: 0.12, Episode reward: -21.0, Mean reward: -17.89
***************************
Episode 940 finished (timesteps: 989616/10000000

Episode 993 finished (timesteps: 1082302/10000000)
Epsilon: 0.03, Episode reward: -17.0, Mean reward: -17.71
***************************
Episode 994 finished (timesteps: 1083392/10000000)
Epsilon: 0.03, Episode reward: -20.0, Mean reward: -17.72
***************************
Episode 995 finished (timesteps: 1085047/10000000)
Epsilon: 0.02, Episode reward: -18.0, Mean reward: -17.73
***************************
Episode 996 finished (timesteps: 1086830/10000000)
Epsilon: 0.02, Episode reward: -17.0, Mean reward: -17.70
***************************
Episode 997 finished (timesteps: 1088564/10000000)
Epsilon: 0.02, Episode reward: -17.0, Mean reward: -17.68
***************************
Episode 998 finished (timesteps: 1090284/10000000)
Epsilon: 0.02, Episode reward: -16.0, Mean reward: -17.67
***************************
Episode 999 finished (timesteps: 1092491/10000000)
Epsilon: 0.02, Episode reward: -15.0, Mean reward: -17.65
***************************
Episode 1000 finished (timesteps: 1096024

Episode 1052 finished (timesteps: 1199679/10000000)
Epsilon: 0.01, Episode reward: -17.0, Mean reward: -16.89
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-1200000.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-1200000.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-1200000.mp4
Episode 1053 finished (timesteps: 1201843/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -16.84
***************************
Episode 1054 finished (timesteps: 1203557/10000000)
Epsilon: 0.01, Episode reward: -17.0, Mean reward: -16.80
***************************
Episode 1055 finished (timesteps: 1205041/10000000)
Epsilon: 0.01, Episode reward: -20.0, Mean reward: -16.86
***************************
Episode 1056 finished (timesteps: 1206843/10000000)
Epsilon: 0.01, Episode reward: -18.0, Mean reward: -16.86
***************************
Episode 1057 finished (timesteps: 1208521/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -16.80
***************************
Episode 1058 finished (timesteps: 1210054/10000000)
Epsilon: 0.01, Episode reward: -19.0, Mean reward: -16.81
***************************
Episode 1059 finished (timesteps: 1211535/10000000)
Epsilo

Episode 1112 finished (timesteps: 1315282/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -15.01
***************************
Episode 1113 finished (timesteps: 1316918/10000000)
Epsilon: 0.01, Episode reward: -16.0, Mean reward: -15.03
***************************
Episode 1114 finished (timesteps: 1319168/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -14.96
***************************
Episode 1115 finished (timesteps: 1321017/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: -14.90
***************************
Episode 1116 finished (timesteps: 1322651/10000000)
Epsilon: 0.01, Episode reward: -18.0, Mean reward: -14.94
***************************
Episode 1117 finished (timesteps: 1324227/10000000)
Epsilon: 0.01, Episode reward: -16.0, Mean reward: -14.95
***************************
Episode 1118 finished (timesteps: 1325950/10000000)
Epsilon: 0.01, Episode reward: -15.0, Mean reward: -14.92
***************************
Episode 1119 finished (timesteps: 

Episode 1172 finished (timesteps: 1440219/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -13.40
***************************
Episode 1173 finished (timesteps: 1442228/10000000)
Epsilon: 0.01, Episode reward: -15.0, Mean reward: -13.44
***************************
Episode 1174 finished (timesteps: 1445141/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -13.35
***************************
Episode 1175 finished (timesteps: 1447278/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -13.36
***************************
Episode 1176 finished (timesteps: 1450118/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -13.30
***************************
Episode 1177 finished (timesteps: 1452860/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: -13.18
***************************
Episode 1178 finished (timesteps: 1455039/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -13.20
***************************
Episode 1179 finished (timesteps: 145

Episode 1232 finished (timesteps: 1569752/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -12.21
***************************
Episode 1233 finished (timesteps: 1571915/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -12.21
***************************
Episode 1234 finished (timesteps: 1573879/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -12.18
***************************
Episode 1235 finished (timesteps: 1575844/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -12.13
***************************
Episode 1236 finished (timesteps: 1578195/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -12.02
***************************
Episode 1237 finished (timesteps: 1580205/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -12.03
***************************
Episode 1238 finished (timesteps: 1582536/10000000)
Epsilon: 0.01, Episode reward: -6.0, Mean reward: -12.01
***************************
Episode 1239 finished (timesteps: 15

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-1600000.mp4
Episode 1247 finished (timesteps: 1602663/10000000)
Epsilon: 0.01, Episode reward: -6.0, Mean reward: -11.70
***************************
Episode 1248 finished (timesteps: 1604704/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -11.68
***************************
Episode 1249 finished (timesteps: 1606700/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: -11.60
***************************
Episode 1250 finished (timesteps: 1608148/10000000)
Epsilon: 0.01, Episode reward: -17.0, Mean reward: -11.60
***************************
Episode 1251 finished (timesteps: 1609496/10000000)
Epsilon: 0.01, Episode reward: -17.0, Mean reward: -11.67
***************************
Episode 1252 finished (timesteps: 1611376/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -11.64
***************************
Episode 1253 finished (timesteps: 1613922/10000000)
Epsilon

Episode 1306 finished (timesteps: 1728578/10000000)
Epsilon: 0.01, Episode reward: -6.0, Mean reward: -9.43
***************************
Episode 1307 finished (timesteps: 1730975/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -9.38
***************************
Episode 1308 finished (timesteps: 1733713/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: -9.35
***************************
Episode 1309 finished (timesteps: 1735541/10000000)
Epsilon: 0.01, Episode reward: -16.0, Mean reward: -9.37
***************************
Episode 1310 finished (timesteps: 1738277/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -9.30
***************************
Episode 1311 finished (timesteps: 1740227/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -9.33
***************************
Episode 1312 finished (timesteps: 1742781/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -9.28
***************************
Episode 1313 finished (timesteps: 1744600/100

Episode 1367 finished (timesteps: 1866030/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: -7.51
***************************
Episode 1368 finished (timesteps: 1867872/10000000)
Epsilon: 0.01, Episode reward: -16.0, Mean reward: -7.55
***************************
Episode 1369 finished (timesteps: 1869952/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: -7.33
***************************
Episode 1370 finished (timesteps: 1872156/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: -7.19
***************************
Episode 1371 finished (timesteps: 1874258/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -7.14
***************************
Episode 1372 finished (timesteps: 1875992/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -7.10
***************************
Episode 1373 finished (timesteps: 1878421/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -7.11
***************************
Episode 1374 finished (timesteps: 1880668/1000

Episode 1428 finished (timesteps: 1993283/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -6.12
***************************
Episode 1429 finished (timesteps: 1995661/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: -5.87
***************************
Episode 1430 finished (timesteps: 1998044/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -5.77
***************************
Episode 1431 finished (timesteps: 2000728/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -5.77
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2000000.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2000000.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2000000.mp4
Episode 1432 finished (timesteps: 2002808/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -5.76
***************************
Episode 1433 finished (timesteps: 2005217/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: -5.58
***************************
Episode 1434 finished (timesteps: 2007777/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -5.49
***************************
Episode 1435 finished (timesteps: 2010046/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -5.43
***************************
Episode 1436 finished (timesteps: 2012155/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -5.42
***************************
Episode 1437 finished (timesteps: 2014639/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: -5.27
***************************
Episode 1438 finished (timesteps: 2016190/10000000)
Epsilon: 0.01, Epi

Episode 1492 finished (timesteps: 2130232/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -6.67
***************************
Episode 1493 finished (timesteps: 2133117/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: -6.53
***************************
Episode 1494 finished (timesteps: 2136111/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: -6.66
***************************
Episode 1495 finished (timesteps: 2138162/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -6.62
***************************
Episode 1496 finished (timesteps: 2139924/10000000)
Epsilon: 0.01, Episode reward: -16.0, Mean reward: -6.64
***************************
Episode 1497 finished (timesteps: 2141956/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: -6.38
***************************
Episode 1498 finished (timesteps: 2144496/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -6.34
***************************
Episode 1499 finished (timesteps: 2146744/10000

Episode 1552 finished (timesteps: 2260820/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -7.34
***************************
Episode 1553 finished (timesteps: 2263280/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: -7.13
***************************
Episode 1554 finished (timesteps: 2265271/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -7.38
***************************
Episode 1555 finished (timesteps: 2267165/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -7.40
***************************
Episode 1556 finished (timesteps: 2268933/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -7.40
***************************
Episode 1557 finished (timesteps: 2271533/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: -7.19
***************************
Episode 1558 finished (timesteps: 2273878/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -7.15
***************************
Episode 1559 finished (timesteps: 2276190/1000

Episode 1613 finished (timesteps: 2396336/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -5.95
***************************
Episode 1614 finished (timesteps: 2398235/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -5.97
***************************
Episode 1615 finished (timesteps: 2400391/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: -5.69
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2400000.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2400000.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2400000.mp4
Episode 1616 finished (timesteps: 2402610/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: -5.51
***************************
Episode 1617 finished (timesteps: 2404787/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: -5.29
***************************
Episode 1618 finished (timesteps: 2407243/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -5.24
***************************
Episode 1619 finished (timesteps: 2409670/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -5.11
***************************
Episode 1620 finished (timesteps: 2411733/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -5.14
***************************
Episode 1621 finished (timesteps: 2414755/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: -5.17
***************************
Episode 1622 finished (timesteps: 2416797/10000000)
Epsilon: 0.01, Episo

Episode 1676 finished (timesteps: 2544298/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: -6.12
***************************
Episode 1677 finished (timesteps: 2546763/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -6.06
***************************
Episode 1678 finished (timesteps: 2549154/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -6.07
***************************
Episode 1679 finished (timesteps: 2552386/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -6.01
***************************
Episode 1680 finished (timesteps: 2554428/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -5.98
***************************
Episode 1681 finished (timesteps: 2558329/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: -5.84
***************************
Episode 1682 finished (timesteps: 2561086/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: -5.73
***************************
Episode 1683 finished (timesteps: 2563107/10000

Episode 1737 finished (timesteps: 2692165/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: -4.24
***************************
Episode 1738 finished (timesteps: 2694432/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -4.23
***************************
Episode 1739 finished (timesteps: 2696424/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -4.27
***************************
Episode 1740 finished (timesteps: 2698979/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: -4.09
***************************
Episode 1741 finished (timesteps: 2701720/10000000)
Epsilon: 0.01, Episode reward: -1.0, Mean reward: -4.02
***************************
Episode 1742 finished (timesteps: 2703944/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -4.04
***************************
Episode 1743 finished (timesteps: 2706750/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -3.93
***************************
Episode 1744 finished (timesteps: 2708725/10000

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-2800000.mp4
Episode 1790 finished (timesteps: 2802779/10000000)
Epsilon: 0.01, Episode reward: -1.0, Mean reward: -4.30
***************************
Episode 1791 finished (timesteps: 2805267/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -4.31
***************************
Episode 1792 finished (timesteps: 2807308/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -4.33
***************************
Episode 1793 finished (timesteps: 2809163/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -4.55
***************************
Episode 1794 finished (timesteps: 2811579/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -4.75
***************************
Episode 1795 finished (timesteps: 2814408/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: -4.73
***************************
Episode 1796 finished (timesteps: 2816459/10000000)
Epsilon: 0.01, 

Episode 1850 finished (timesteps: 2941218/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -6.86
***************************
Episode 1851 finished (timesteps: 2942835/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -6.91
***************************
Episode 1852 finished (timesteps: 2945759/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: -6.82
***************************
Episode 1853 finished (timesteps: 2947794/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: -6.54
***************************
Episode 1854 finished (timesteps: 2949985/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -6.69
***************************
Episode 1855 finished (timesteps: 2952468/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -6.64
***************************
Episode 1856 finished (timesteps: 2955123/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -6.64
***************************
Episode 1857 finished (timesteps: 2957427/10000

Episode 1911 finished (timesteps: 3072808/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: -6.33
***************************
Episode 1912 finished (timesteps: 3075161/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: -6.36
***************************
Episode 1913 finished (timesteps: 3078190/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: -6.33
***************************
Episode 1914 finished (timesteps: 3080669/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: -6.15
***************************
Episode 1915 finished (timesteps: 3083195/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: -6.07
***************************
Episode 1916 finished (timesteps: 3085604/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: -6.02
***************************
Episode 1917 finished (timesteps: 3086588/10000000)
Epsilon: 0.01, Episode reward: -20.0, Mean reward: -6.10
***************************
Episode 1918 finished (timesteps: 3087824/10000000)


                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-3200000.mp4
Episode 1970 finished (timesteps: 3201358/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -5.58
***************************
Episode 1971 finished (timesteps: 3203399/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: -5.45
***************************
Episode 1972 finished (timesteps: 3206040/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -5.41
***************************
Episode 1973 finished (timesteps: 3208393/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: -5.38
***************************
Episode 1974 finished (timesteps: 3210671/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: -5.43
***************************
Episode 1975 finished (timesteps: 3213814/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: -5.29
***************************
Episode 1976 finished (timesteps: 3216072/10000000)
Epsilon: 0.01, Episo

Episode 2029 finished (timesteps: 3353614/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -3.09
***************************
Episode 2030 finished (timesteps: 3356694/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: -2.85
***************************
Episode 2031 finished (timesteps: 3359763/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -2.95
***************************
Episode 2032 finished (timesteps: 3361773/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: -2.71
***************************
Episode 2033 finished (timesteps: 3364693/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: -2.60
***************************
Episode 2034 finished (timesteps: 3366570/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: -2.69
***************************
Episode 2035 finished (timesteps: 3368723/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: -2.67
***************************
Episode 2036 finished (timesteps: 3371172/100000

Episode 2090 finished (timesteps: 3527292/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: -1.05
***************************
Episode 2091 finished (timesteps: 3530313/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: -1.06
***************************
Episode 2092 finished (timesteps: 3534308/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: -1.06
***************************
Episode 2093 finished (timesteps: 3536503/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: -0.87
***************************
Episode 2094 finished (timesteps: 3538845/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: -1.13
***************************
Episode 2095 finished (timesteps: 3542173/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -1.11
***************************
Episode 2096 finished (timesteps: 3545193/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: -1.17
***************************
Episode 2097 finished (timesteps: 3547608/100000

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-3600000.mp4




Episode 2117 finished (timesteps: 3601548/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -1.65
***************************
Episode 2118 finished (timesteps: 3605338/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -1.56
***************************
Episode 2119 finished (timesteps: 3609633/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: -1.33
***************************
Episode 2120 finished (timesteps: 3611965/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: -1.36
***************************
Episode 2121 finished (timesteps: 3617362/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: -1.42
***************************
Episode 2122 finished (timesteps: 3620467/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: -1.15
***************************
Episode 2123 finished (timesteps: 3624019/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: -1.26
***************************
Episode 2124 finished (timesteps: 3627575/1000000

Episode 2178 finished (timesteps: 3782618/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -0.98
***************************
Episode 2179 finished (timesteps: 3784894/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: -1.16
***************************
Episode 2180 finished (timesteps: 3787809/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: -1.14
***************************
Episode 2181 finished (timesteps: 3789940/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: -0.99
***************************
Episode 2182 finished (timesteps: 3792774/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: -1.01
***************************
Episode 2183 finished (timesteps: 3794052/10000000)
Epsilon: 0.01, Episode reward: -17.0, Mean reward: -1.16
***************************
Episode 2184 finished (timesteps: 3796788/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: -0.87
***************************
Episode 2185 finished (timesteps: 3799642/10000

Episode 2239 finished (timesteps: 3959882/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: 0.84
***************************
Episode 2240 finished (timesteps: 3962310/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: 0.79
***************************
Episode 2241 finished (timesteps: 3964864/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 0.92
***************************
Episode 2242 finished (timesteps: 3968207/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 1.05
***************************
Episode 2243 finished (timesteps: 3971892/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: 1.03
***************************
Episode 2244 finished (timesteps: 3975585/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 1.03
***************************
Episode 2245 finished (timesteps: 3977929/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 1.23
***************************
Episode 2246 finished (timesteps: 3981563/10000000)
Epsil

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-4000000.mp4
Episode 2254 finished (timesteps: 4002198/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 1.33
***************************
Episode 2255 finished (timesteps: 4005013/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: 1.35
***************************
Episode 2256 finished (timesteps: 4008220/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: 1.47
***************************
Episode 2257 finished (timesteps: 4010864/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: 1.31
***************************
Episode 2258 finished (timesteps: 4014817/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 1.19
***************************
Episode 2259 finished (timesteps: 4018367/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 1.06
***************************
Episode 2260 finished (timesteps: 4022650/10000000)
Epsilon: 0.01, Episode rewa

Episode 2314 finished (timesteps: 4182573/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 1.62
***************************
Episode 2315 finished (timesteps: 4185656/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: 1.42
***************************
Episode 2316 finished (timesteps: 4189492/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 1.28
***************************
Episode 2317 finished (timesteps: 4192183/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: 1.41
***************************
Episode 2318 finished (timesteps: 4195397/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 1.49
***************************
Episode 2319 finished (timesteps: 4198433/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: 1.61
***************************
Episode 2320 finished (timesteps: 4201408/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 1.62
***************************
Episode 2321 finished (timesteps: 4204595/10000000)
Epsil

Episode 2375 finished (timesteps: 4347930/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 0.91
***************************
Episode 2376 finished (timesteps: 4351334/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: 0.97
***************************
Episode 2377 finished (timesteps: 4353910/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 0.92
***************************
Episode 2378 finished (timesteps: 4356503/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: 1.06
***************************
Episode 2379 finished (timesteps: 4359617/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 1.23
***************************
Episode 2380 finished (timesteps: 4362022/10000000)
Epsilon: 0.01, Episode reward: -11.0, Mean reward: 1.03
***************************
Episode 2381 finished (timesteps: 4365934/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 0.94
***************************
Episode 2382 finished (timesteps: 4369750/10000000)
Epsilo

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-4400000.mp4
Episode 2392 finished (timesteps: 4402553/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: 1.07
***************************
Episode 2393 finished (timesteps: 4407416/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 1.21
***************************
Episode 2394 finished (timesteps: 4411682/10000000)
Epsilon: 0.01, Episode reward: -3.0, Mean reward: 1.22
***************************
Episode 2395 finished (timesteps: 4415510/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 1.32
***************************
Episode 2396 finished (timesteps: 4419286/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 1.42
***************************
Episode 2397 finished (timesteps: 4423343/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 1.63
***************************
Episode 2398 finished (timesteps: 4427862/10000000)
Epsilon: 0.01, Episode rew

Episode 2452 finished (timesteps: 4604630/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 2.76
***************************
Episode 2453 finished (timesteps: 4609706/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: 2.87
***************************
Episode 2454 finished (timesteps: 4613335/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 2.77
***************************
Episode 2455 finished (timesteps: 4616502/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: 2.55
***************************
Episode 2456 finished (timesteps: 4620316/10000000)
Epsilon: 0.01, Episode reward: -8.0, Mean reward: 2.43
***************************
Episode 2457 finished (timesteps: 4623810/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: 2.49
***************************
Episode 2458 finished (timesteps: 4627726/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: 2.28
***************************
Episode 2459 finished (timesteps: 4631034/10000000)
Epsilon

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-4800000.mp4
Episode 2510 finished (timesteps: 4803446/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: 2.71
***************************
Episode 2511 finished (timesteps: 4806340/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 2.74
***************************
Episode 2512 finished (timesteps: 4809609/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: 2.73
***************************
Episode 2513 finished (timesteps: 4813019/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 2.76
***************************
Episode 2514 finished (timesteps: 4815954/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 2.68
***************************
Episode 2515 finished (timesteps: 4819038/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 2.71
***************************
Episode 2516 finished (timesteps: 4823440/10000000)
Epsilon: 0.01, Episode rew

Episode 2570 finished (timesteps: 4991398/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 2.78
***************************
Episode 2571 finished (timesteps: 4994426/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 2.86
***************************
Episode 2572 finished (timesteps: 4998955/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 2.87
***************************
Episode 2573 finished (timesteps: 5003222/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 2.80
***************************
Episode 2574 finished (timesteps: 5006534/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 2.87
***************************
Episode 2575 finished (timesteps: 5009761/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 2.80
***************************
Episode 2576 finished (timesteps: 5013237/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 2.85
***************************
Episode 2577 finished (timesteps: 5016448/10000000)
Epsilo

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-5200000.mp4
Episode 2626 finished (timesteps: 5201730/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: 3.46
***************************
Episode 2627 finished (timesteps: 5205295/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 3.65
***************************
Episode 2628 finished (timesteps: 5209171/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 3.84
***************************
Episode 2629 finished (timesteps: 5212686/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 3.76
***************************
Episode 2630 finished (timesteps: 5216879/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: 3.65
***************************
Episode 2631 finished (timesteps: 5220606/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 3.73
***************************
Episode 2632 finished (timesteps: 5224735/10000000)
Epsilon: 0.01, Episode rewa

Episode 2687 finished (timesteps: 5411991/10000000)
Epsilon: 0.01, Episode reward: 2.0, Mean reward: 6.08
***************************
Episode 2688 finished (timesteps: 5415976/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: 6.04
***************************
Episode 2689 finished (timesteps: 5420274/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 5.90
***************************
Episode 2690 finished (timesteps: 5425547/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 6.03
***************************
Episode 2691 finished (timesteps: 5428829/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 6.13
***************************
Episode 2692 finished (timesteps: 5432201/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 6.21
***************************
Episode 2693 finished (timesteps: 5435436/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 6.28
***************************
Episode 2694 finished (timesteps: 5439407/10000000)
Epsilon

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-5600000.mp4
Episode 2742 finished (timesteps: 5603172/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 6.34
***************************
Episode 2743 finished (timesteps: 5606642/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: 6.17
***************************
Episode 2744 finished (timesteps: 5609619/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 6.18
***************************
Episode 2745 finished (timesteps: 5613039/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 6.18
***************************
Episode 2746 finished (timesteps: 5616009/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 6.29
***************************
Episode 2747 finished (timesteps: 5618377/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 6.47
***************************
Episode 2748 finished (timesteps: 5622347/10000000)
Epsilon: 0.01, Episode r

Episode 2802 finished (timesteps: 5801496/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 7.33
***************************
Episode 2803 finished (timesteps: 5805805/10000000)
Epsilon: 0.01, Episode reward: -1.0, Mean reward: 7.39
***************************
Episode 2804 finished (timesteps: 5808575/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 7.61
***************************
Episode 2805 finished (timesteps: 5812224/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 7.57
***************************
Episode 2806 finished (timesteps: 5813484/10000000)
Epsilon: 0.01, Episode reward: -20.0, Mean reward: 7.32
***************************
Episode 2807 finished (timesteps: 5816104/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: 7.15
***************************
Episode 2808 finished (timesteps: 5820086/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 7.19
***************************
Episode 2809 finished (timesteps: 5823280/10000000)
Ep

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-6000000.mp4
Episode 2859 finished (timesteps: 6002630/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 6.42
***************************
Episode 2860 finished (timesteps: 6006062/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 6.46
***************************
Episode 2861 finished (timesteps: 6011190/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: 6.30
***************************
Episode 2862 finished (timesteps: 6014718/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 6.28
***************************
Episode 2863 finished (timesteps: 6018471/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 6.34
***************************
Episode 2864 finished (timesteps: 6023345/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 6.48
***************************
Episode 2865 finished (timesteps: 6028672/10000000)
Epsilon: 0.01, Episode rew

Episode 2919 finished (timesteps: 6211690/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 7.73
***************************
Episode 2920 finished (timesteps: 6214313/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 7.94
***************************
Episode 2921 finished (timesteps: 6217061/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 8.18
***************************
Episode 2922 finished (timesteps: 6220445/10000000)
Epsilon: 0.01, Episode reward: 8.0, Mean reward: 8.12
***************************
Episode 2923 finished (timesteps: 6223469/10000000)
Epsilon: 0.01, Episode reward: -10.0, Mean reward: 7.99
***************************
Episode 2924 finished (timesteps: 6227147/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 7.90
***************************
Episode 2925 finished (timesteps: 6230386/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 7.90
***************************
Episode 2926 finished (timesteps: 6233674/10000000)
Epsi

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-6400000.mp4
Episode 2972 finished (timesteps: 6402230/10000000)
Epsilon: 0.01, Episode reward: -12.0, Mean reward: 7.49
***************************
Episode 2973 finished (timesteps: 6403397/10000000)
Epsilon: 0.01, Episode reward: -18.0, Mean reward: 7.24
***************************
Episode 2974 finished (timesteps: 6405975/10000000)
Epsilon: 0.01, Episode reward: -9.0, Mean reward: 7.05
***************************
Episode 2975 finished (timesteps: 6410217/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 7.02
***************************
Episode 2976 finished (timesteps: 6413199/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 6.94
***************************
Episode 2977 finished (timesteps: 6416106/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 6.94
***************************
Episode 2978 finished (timesteps: 6419363/10000000)
Epsilon: 0.01, Episode r

Episode 3032 finished (timesteps: 6617500/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 5.55
***************************
Episode 3033 finished (timesteps: 6619616/10000000)
Epsilon: 0.01, Episode reward: 21.0, Mean reward: 5.78
***************************
Episode 3034 finished (timesteps: 6623416/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 5.91
***************************
Episode 3035 finished (timesteps: 6627709/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 5.87
***************************
Episode 3036 finished (timesteps: 6632274/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 5.91
***************************
Episode 3037 finished (timesteps: 6636016/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: 5.87
***************************
Episode 3038 finished (timesteps: 6638602/10000000)
Epsilon: 0.01, Episode reward: 18.0, Mean reward: 5.91
***************************
Episode 3039 finished (timesteps: 6641234/10000000)
Epsilon

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-6800000.mp4
Episode 3089 finished (timesteps: 6803890/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 7.92
***************************
Episode 3090 finished (timesteps: 6807239/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 8.09
***************************
Episode 3091 finished (timesteps: 6810547/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 8.17
***************************
Episode 3092 finished (timesteps: 6813987/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 8.20
***************************
Episode 3093 finished (timesteps: 6818548/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.28
***************************
Episode 3094 finished (timesteps: 6821586/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 8.32
***************************
Episode 3095 finished (timesteps: 6825071/10000000)
Epsilon: 0.01, Episode re

Episode 3149 finished (timesteps: 7009313/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 7.76
***************************
Episode 3150 finished (timesteps: 7012783/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 7.77
***************************
Episode 3151 finished (timesteps: 7016508/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 7.73
***************************
Episode 3152 finished (timesteps: 7019252/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 7.81
***************************
Episode 3153 finished (timesteps: 7021416/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 7.87
***************************
Episode 3154 finished (timesteps: 7024814/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: 7.74
***************************
Episode 3155 finished (timesteps: 7028713/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 7.75
***************************
Episode 3156 finished (timesteps: 7033344/10000000)
Epsilo

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-7200000.mp4
Episode 3202 finished (timesteps: 7202462/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.69
***************************
Episode 3203 finished (timesteps: 7205239/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 8.74
***************************
Episode 3204 finished (timesteps: 7208338/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.72
***************************
Episode 3205 finished (timesteps: 7211947/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: 8.61
***************************
Episode 3206 finished (timesteps: 7215287/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: 8.40
***************************
Episode 3207 finished (timesteps: 7219521/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.36
***************************
Episode 3208 finished (timesteps: 7223380/10000000)
Epsilon: 0.01, Episode rewa

Episode 3263 finished (timesteps: 7418835/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 7.56
***************************
Episode 3264 finished (timesteps: 7423018/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 7.52
***************************
Episode 3265 finished (timesteps: 7428092/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 7.35
***************************
Episode 3266 finished (timesteps: 7432763/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 7.34
***************************
Episode 3267 finished (timesteps: 7438410/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 7.26
***************************
Episode 3268 finished (timesteps: 7441501/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 7.26
***************************
Episode 3269 finished (timesteps: 7446554/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: 7.17
***************************
Episode 3270 finished (timesteps: 7450287/10000000)
Epsilon

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-7600000.mp4
Episode 3314 finished (timesteps: 7604474/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 7.33
***************************
Episode 3315 finished (timesteps: 7608560/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 7.23
***************************
Episode 3316 finished (timesteps: 7612029/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 7.44
***************************
Episode 3317 finished (timesteps: 7616282/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 7.56
***************************
Episode 3318 finished (timesteps: 7620967/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 7.50
***************************
Episode 3319 finished (timesteps: 7624586/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 7.59
***************************
Episode 3320 finished (timesteps: 7627951/10000000)
Epsilon: 0.01, Episode rew

Episode 3374 finished (timesteps: 7818401/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.85
***************************
Episode 3375 finished (timesteps: 7821688/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 8.87
***************************
Episode 3376 finished (timesteps: 7825358/10000000)
Epsilon: 0.01, Episode reward: -6.0, Mean reward: 8.65
***************************
Episode 3377 finished (timesteps: 7826782/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: 8.45
***************************
Episode 3378 finished (timesteps: 7830102/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 8.50
***************************
Episode 3379 finished (timesteps: 7834205/10000000)
Epsilon: 0.01, Episode reward: -14.0, Mean reward: 8.28
***************************
Episode 3380 finished (timesteps: 7838096/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 8.20
***************************
Episode 3381 finished (timesteps: 7844849/10000000)
Eps

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-8000000.mp4
Episode 3424 finished (timesteps: 8001117/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 8.71
***************************
Episode 3425 finished (timesteps: 8005234/10000000)
Epsilon: 0.01, Episode reward: -5.0, Mean reward: 8.51
***************************
Episode 3426 finished (timesteps: 8009428/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 8.53
***************************
Episode 3427 finished (timesteps: 8013085/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 8.60
***************************
Episode 3428 finished (timesteps: 8015573/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 8.59
***************************
Episode 3429 finished (timesteps: 8019495/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 8.76
***************************
Episode 3430 finished (timesteps: 8022839/10000000)
Epsilon: 0.01, Episode re

Episode 3484 finished (timesteps: 8199529/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 9.53
***************************
Episode 3485 finished (timesteps: 8201854/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 9.57
***************************
Episode 3486 finished (timesteps: 8206372/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 9.59
***************************
Episode 3487 finished (timesteps: 8209690/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 9.58
***************************
Episode 3488 finished (timesteps: 8211723/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.59
***************************
Episode 3489 finished (timesteps: 8216293/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 9.55
***************************
Episode 3490 finished (timesteps: 8219254/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.51
***************************
Episode 3491 finished (timesteps: 8221991/10000000)
Epsil

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-8400000.mp4
Episode 3543 finished (timesteps: 8403624/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 9.39
***************************
Episode 3544 finished (timesteps: 8406698/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.40
***************************
Episode 3545 finished (timesteps: 8410899/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.46
***************************
Episode 3546 finished (timesteps: 8413497/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 9.55
***************************
Episode 3547 finished (timesteps: 8416931/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 9.57
***************************
Episode 3548 finished (timesteps: 8419931/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 9.65
***************************
Episode 3549 finished (timesteps: 8423284/10000000)
Epsilon: 0.01, Episode re

Episode 3603 finished (timesteps: 8610611/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 10.01
***************************
Episode 3604 finished (timesteps: 8612802/10000000)
Epsilon: 0.01, Episode reward: 19.0, Mean reward: 10.04
***************************
Episode 3605 finished (timesteps: 8615252/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 10.10
***************************
Episode 3606 finished (timesteps: 8617831/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 10.29
***************************
Episode 3607 finished (timesteps: 8622207/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: 10.15
***************************
Episode 3608 finished (timesteps: 8627024/10000000)
Epsilon: 0.01, Episode reward: 6.0, Mean reward: 10.29
***************************
Episode 3609 finished (timesteps: 8630780/10000000)
Epsilon: 0.01, Episode reward: 9.0, Mean reward: 10.19
***************************
Episode 3610 finished (timesteps: 8632412/10000000)

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-8800000.mp4
Episode 3660 finished (timesteps: 8804123/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 9.04
***************************
Episode 3661 finished (timesteps: 8807261/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.12
***************************
Episode 3662 finished (timesteps: 8811227/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 9.10
***************************
Episode 3663 finished (timesteps: 8813980/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 9.16
***************************
Episode 3664 finished (timesteps: 8817348/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 9.15
***************************
Episode 3665 finished (timesteps: 8820281/10000000)
Epsilon: 0.01, Episode reward: 20.0, Mean reward: 9.19
***************************
Episode 3666 finished (timesteps: 8823352/10000000)
Epsilon: 0.01, Episode rew

Episode 3720 finished (timesteps: 9010405/10000000)
Epsilon: 0.01, Episode reward: 12.0, Mean reward: 8.59
***************************
Episode 3721 finished (timesteps: 9013680/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 8.59
***************************
Episode 3722 finished (timesteps: 9018070/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 8.62
***************************
Episode 3723 finished (timesteps: 9022752/10000000)
Epsilon: 0.01, Episode reward: 1.0, Mean reward: 8.54
***************************
Episode 3724 finished (timesteps: 9025861/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 8.50
***************************
Episode 3725 finished (timesteps: 9028162/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 8.59
***************************
Episode 3726 finished (timesteps: 9032310/10000000)
Epsilon: 0.01, Episode reward: 7.0, Mean reward: 8.50
***************************
Episode 3727 finished (timesteps: 9034751/10000000)
Epsil

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-9200000.mp4
Episode 3779 finished (timesteps: 9202838/10000000)
Epsilon: 0.01, Episode reward: 19.0, Mean reward: 9.25
***************************
Episode 3780 finished (timesteps: 9206181/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 9.19
***************************
Episode 3781 finished (timesteps: 9209907/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 9.19
***************************
Episode 3782 finished (timesteps: 9212754/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 9.23
***************************
Episode 3783 finished (timesteps: 9216458/10000000)
Epsilon: 0.01, Episode reward: -7.0, Mean reward: 9.10
***************************
Episode 3784 finished (timesteps: 9218688/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 9.08
***************************
Episode 3785 finished (timesteps: 9221796/10000000)
Epsilon: 0.01, Episode re

Episode 3839 finished (timesteps: 9406553/10000000)
Epsilon: 0.01, Episode reward: 4.0, Mean reward: 9.86
***************************
Episode 3840 finished (timesteps: 9410680/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 9.90
***************************
Episode 3841 finished (timesteps: 9413801/10000000)
Epsilon: 0.01, Episode reward: -13.0, Mean reward: 9.78
***************************
Episode 3842 finished (timesteps: 9416225/10000000)
Epsilon: 0.01, Episode reward: 15.0, Mean reward: 9.79
***************************
Episode 3843 finished (timesteps: 9419847/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 9.80
***************************
Episode 3844 finished (timesteps: 9422491/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 9.77
***************************
Episode 3845 finished (timesteps: 9424890/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 9.87
***************************
Episode 3846 finished (timesteps: 9427842/10000000)
Eps

Episode 3900 finished (timesteps: 9588744/10000000)
Epsilon: 0.01, Episode reward: 11.0, Mean reward: 9.59
***************************
Episode 3901 finished (timesteps: 9592002/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 9.89
***************************
Episode 3902 finished (timesteps: 9595285/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 9.92
***************************
Episode 3903 finished (timesteps: 9598586/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 9.90
***************************
Moviepy - Building video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-9600000.mp4.
Moviepy - Writing video runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-9600000.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-9600000.mp4




Episode 3904 finished (timesteps: 9601157/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 9.93
***************************
Episode 3905 finished (timesteps: 9604037/10000000)
Epsilon: 0.01, Episode reward: 17.0, Mean reward: 10.07
***************************
Episode 3906 finished (timesteps: 9607305/10000000)
Epsilon: 0.01, Episode reward: -2.0, Mean reward: 9.98
***************************
Episode 3907 finished (timesteps: 9610445/10000000)
Epsilon: 0.01, Episode reward: 5.0, Mean reward: 9.90
***************************
Episode 3908 finished (timesteps: 9613245/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 9.87
***************************
Episode 3909 finished (timesteps: 9617126/10000000)
Epsilon: 0.01, Episode reward: -1.0, Mean reward: 9.81
***************************
Episode 3910 finished (timesteps: 9621158/10000000)
Epsilon: 0.01, Episode reward: -4.0, Mean reward: 9.63
***************************
Episode 3911 finished (timesteps: 9624823/10000000)
Eps

Episode 3965 finished (timesteps: 9809712/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 10.58
***************************
Episode 3966 finished (timesteps: 9813075/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 10.70
***************************
Episode 3967 finished (timesteps: 9815519/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 10.70
***************************
Episode 3968 finished (timesteps: 9819586/10000000)
Epsilon: 0.01, Episode reward: 10.0, Mean reward: 10.70
***************************
Episode 3969 finished (timesteps: 9822907/10000000)
Epsilon: 0.01, Episode reward: 16.0, Mean reward: 10.83
***************************
Episode 3970 finished (timesteps: 9828529/10000000)
Epsilon: 0.01, Episode reward: 3.0, Mean reward: 10.69
***************************
Episode 3971 finished (timesteps: 9832328/10000000)
Epsilon: 0.01, Episode reward: 13.0, Mean reward: 10.67
***************************
Episode 3972 finished (timesteps: 9834573/1000000

                                                                                

Moviepy - Done !
Moviepy - video ready runs/videos/CNN_DDQN_Pong-v5_11-04-2023_16:28:30/rl-video-step-10000000.mp4




Episode 4018 finished (timesteps: 10001915/10000000)
Epsilon: 0.01, Episode reward: 14.0, Mean reward: 11.59
***************************
Saving checkpoint...
Checkpoint saved into runs/checkpoints/CNN_DDQN_Pong-v5_11-04-2023_16:28:30
