# Dependencies 

In [1]:
## Setup


# import environment
import gym_super_mario_bros
# import joypad
from nes_py.wrappers import JoypadSpace
# import controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

## Preprocessing
from gym.wrappers import GrayScaleObservation, ResizeObservation
import gym
from gym.spaces import Box
from gym.wrappers import FrameStack

## modeling
import torch
from torch import nn
from torchvision import transforms as T

from PIL import Image
import numpy as np
from pathlib import Path
from collections import deque
import random, datetime, os, copy

## Agent training
import os

## logging
import numpy as np
import time, datetime
import matplotlib.pyplot as plt


In [2]:
# memory fix
torch.cuda.empty_cache()
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

In [3]:
CHECKPOINT_PATH = "./checkpoints_pytorch"
LOG_PATH = "./logs"

#### Agent actions
These are the possible actions that the agent can take in the environment.

In [4]:
SIMPLE_MOVEMENT

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

# Setup environment
For this project we well be making use if the gym-super-mario-bros enviroment.

In [5]:

env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", render_mode='rgb', apply_api_compatibility=True)
env.action_space

  logger.warn(
  logger.warn(


Discrete(256)

In [6]:
env.reset()
next_state, reward, done, trunc, info = env.step(action=0)
print(f"{next_state.shape},\n {reward},\n {done},\n {info}")


(240, 256, 3),
 0.0,
 False,
 {'coins': 0, 'flag_get': False, 'life': 2, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40, 'y_pos': 79}


  if not isinstance(terminated, (bool, np.bool8)):


Simplify the action space to limit the number of possible actions. This will make it easier for the agent to learn the optimal policy as there will be less possible actions to choose from.

In [7]:
# wrap environment with controls
env = JoypadSpace(env, [["right"], ["right", "A"]])
env.action_space

Discrete(2)

The observation space serves as the input to the agent. The agent will use this information to learn the optimal policy.
In our case the observation space is a frame from the game.

In [8]:
env.observation_space.shape

(240, 256, 3)

Test the environment with random actions.

In [9]:
env_demo = env

# done = True # reset the env
# # loop X steps
# for step in range(20):
#     if done:
#         # start the env
#         env_demo.reset()
#     # pass a random action to the env
#     print(env_demo.action_space.sample())
#     state, reward, done, info = env_demo.step(env_demo.action_space.sample()) 
#     # render the env
#     env_demo.render()
# env_demo.close()


Each step that the agent takes in the enviroment will return a new state. This state is the observation space for the next step. In our case an image of the game:

In [10]:
# state.shape
# # plt.imshow(state)

The reward function form the enviroment assumes that the objective is to move as far to the right as possible, as fast as possible and without dying. The value of the reward reflects this. More on how this reward is calculated can be found here, in the gym-super-mario-bros documentation. https://pypi.org/project/gym-super-mario-bros/

In [11]:
# reward

To check wether or not the game is running we use the done variable

In [12]:
# done

Miscellaneous information about the environment is found in the info variable.

In [13]:
# info

# Preprocessing Environment
To make use of the data that the environment returns we need to preprocess it. The two steps we will take are:
1. Convert the image to grayscale - This will reduce the size of the observation space and make it easier for the agent to learn the optimal policy.
2. Frame stacking - This gives the agent a sense of motion and context and helps it understand the dynamics of the game.

### Wrap the environment:

In [14]:
class FrameSkip(gym.Wrapper):
    def __init__(self, env, skip=4):
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0

        for _ in range(self._skip):
            # accumulate reward
            state, reward, done, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return state, total_reward, done, info

In [15]:
# frame skip
class SkipFrame(gym.Wrapper):
    def __init__(self, env, skip):
        """Return only every `skip`-th frame"""
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        """Repeat action, and sum reward"""
        total_reward = 0.0
        for i in range(self._skip):
            # Accumulate reward and repeat the same action
            obs, reward, done, trunk, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, trunk, info

In [16]:
# Grayscale
print("Input shape before grayscale: ", env.observation_space.shape)
class GrayScaleObservation(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        obs_shape = self.observation_space.shape[:2]
        self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)

    def permute_orientation(self, observation):
        # permute [H, W, C] array to [C, H, W] tensor
        observation = np.transpose(observation, (2, 0, 1))
        observation = torch.tensor(observation.copy(), dtype=torch.float)
        return observation

    def observation(self, observation):
        observation = self.permute_orientation(observation)
        transform = T.Grayscale()
        observation = transform(observation)
        return observation
print("Input shape after grayscale: ", env.observation_space.shape)


Input shape before grayscale:  (240, 256, 3)
Input shape after grayscale:  (240, 256, 3)


In [17]:
# Wrap the environment with the wrapper
# env = DummyVecEnv([lambda: env])


In [18]:
# reize observation
class ResizeObservation(gym.ObservationWrapper):
    def __init__(self, env, shape):
        super().__init__(env)
        if isinstance(shape, int):
            self.shape = (shape, shape)
        else:
            self.shape = tuple(shape)

        obs_shape = self.shape + self.observation_space.shape[2:]
        self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)

    def observation(self, observation):
        transforms = T.Compose(
            [T.Resize(self.shape), T.Normalize(0, 255)]
        )
        observation = transforms(observation).squeeze(0)
        return observation

In [19]:
# FrameStack
# env = VecFrameStack(env, 4, channels_order='last')

In [20]:
env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env)
env = ResizeObservation(env, shape=84)
env = FrameStack(env, num_stack=4)

# Build the agent

In [21]:
class MarioNet(nn.Module):
    """
    Small CNN
    """
    def __init__(self, input_dim, output_dim):
        super().__init__()
        c, h, w= input_dim
        
        if h != 84:
            raise ValueError(f"Expecting input height: 84, got: {h}")
        if w != 84:
            raise ValueError(f"Expecting input width: 84, got: {w}")
        
        self.online = nn.Sequential(
            nn.Conv2d(c, 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(3136, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim),
        )

        self.target = copy.deepcopy(self.online)

        for p in self.target.parameters():
            p.requires_grad = False
    def forward(self, input, model):
        if model == "online":
            return self.online(input)
        elif model == "target":
            return self.target(input)

class MarioAgent:
    def __init__(self, state_dim, action_dim):
        self.state_dim = state_dim
        self.action_dim = action_dim

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.net = MarioNet(self.state_dim, self.action_dim).to(device=self.device)

        self.exploration_rate = 1
        self.exploration_rate_decay = 0.99999975
        self.exploration_rate_min = 0.1
        self.curr_step = 0
        self.save_every = 10000 
        self.memory = deque(maxlen=40000) # 40000 is the max length of deque beacause of the memory of the GPU
        self.batch_size = 32
        self.gamma = 0.9

        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=0.00025)
        self.loss_fn = torch.nn.SmoothL1Loss()

        self.burnin = 1e4 # min. experiences before training
        self.learn_every = 3 # no. of experiences between updates to Q_online
        self.sync_every = 1e4 # no. of experiences between Q_target & Q_online sync

    
    def act(self, state):
        """ Given a state, choose a epsilon-greedy action """
        if random.random() < self.exploration_rate:
            action_idx = random.randint(0, self.action_dim - 1)
        else:
            state = state[0].__array__() if isinstance(state, tuple) else state.__array__()
            state = torch.tensor(state).unsqueeze(0).to(device=self.device)
            action_values = self.net(state, model="online")
            action_idx = torch.argmax(action_values, axis=1).item()

        self.exploration_rate *= self.exploration_rate_decay
        self.exploration_rate = max(self.exploration_rate_min, self.exploration_rate)

        # Increment step
        self.curr_step += 1
        return action_idx

    def cache(self, state, next_state, action, reward, done):
        """ Add to memory """
        def is_first_tuple(x):
            return x[0] if isinstance(x, tuple) else x
        
        state = is_first_tuple(state).__array__()
        next_state = is_first_tuple(next_state).__array__()

        state = torch.tensor(state, device=self.device)
        next_state = torch.tensor(next_state, device=self.device)
        action = torch.tensor([action], device=self.device)
        reward = torch.tensor([reward], device=self.device)
        done = torch.tensor([done], device=self.device)

        self.memory.append((state, next_state, action, reward, done))
    
    def recall(self):
        """ Sample from memory """
        batch = random.sample(self.memory, self.batch_size)
        state, next_state, action, reward, done = map(torch.stack, zip(*batch))
        return state, next_state, action.squeeze(), reward.squeeze(), done.squeeze()
    

    def td_estimate(self, state, action):
        """ Estimate TD(0) """
        current_Q = self.net(state, model="online")[np.arange(0, self.batch_size), action]
        return current_Q
    
    @torch.no_grad()
    def td_target(self, reward, next_state, done):
        next_state_Q = self.net(next_state, model="online")
        best_action = torch.argmax(next_state_Q, axis=1)
        next_Q = self.net(next_state, model="target")[np.arange(0, self.batch_size), best_action]

        return (reward + (1 - done.float()) * self.gamma * next_Q).float()
    
    def update_Q(self, td_estimate, td_target):
        loss = self.loss_fn(td_estimate, td_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step() # TODO: OOM here 
        return loss.item()
    
    def sync_Q_target(self):
        self.net.target.load_state_dict(self.net.online.state_dict())

    def save(self):
        save_path = (f"{CHECKPOINT_PATH}/ mario_net_{self.curr_step}.chkpt")
        torch.save(dict(model=self.net.state_dict(), exploration_rate=self.exploration_rate), save_path)
        print(f"MarioNet saved to {save_path} at step {self.curr_step}")

    def learn(self):
        """  """
        if self.curr_step % self.sync_every == 0:
            self.sync_Q_target()
        
        if self.curr_step % self.save_every == 0:
            self.save()

        if self.curr_step < self.burnin:
            return None, None
        
        if self.curr_step % self.learn_every != 0:
            return None, None
        
        # Sample from memory
        state, next_state, action, reward, done = self.recall()

        # Get TD Estimate
        td_est = self.td_estimate(state, action)

        # Get TD Target
        td_tgt = self.td_target(reward, next_state, done)

        # Backpropagate loss through Q_online
        loss = self.update_Q(td_est, td_tgt)

        return (td_est.mean().item(), loss)


In [22]:
class MetricLogger:
    def __init__(self, save_dir):
        self.save_dir = save_dir / "log"
        with open(self.save_dir, "w") as f:
            f.write(
                f"{'Episode':>8}{'Step':>8}{'Epsilon':>10}{'MeanReward':>15}"
                f"{'MeanLength':>15}{'MeanLoss':>15}{'MeanQValue':>15}\n"
                f"{'timeDelta':>15}{'Time':>20}\n"    
                )
        self.ep_rewards_plot = save_dir / "reward_plot.jpg"
        self.ep_lengths_plot = save_dir / "length_plot.jpg"
        self.ep_avg_losses_plot  = save_dir / "loss_plot.jpg"
        self.ep_avg_qs_plot  = save_dir / "q_plot.jpg"

        # Metrics
        self.ep_rewards = []
        self.ep_lengths = []
        self.ep_avg_losses = []
        self.ep_avg_qs  = []

        # Moving averages
        self.moving_avg_ep_rewards = []
        self.moving_avg_ep_lengths = []
        self.moving_avg_ep_avg_losses = []
        self.moving_avg_ep_avg_qs = []

        # Current episode
        self.init_episode()

        # Timing
        self.record_time = time.time()

    def log_step(self, reward, loss, q_value):
        self.curr_ep_reward += reward
        self.curr_ep_length += 1
        if loss:
            self.curr_ep_loss += loss
            self.curr_ep_q_value += q_value
            self.curr_ep_loss_length += 1
    
    def log_episode(self):
        #end of episode
        self.ep_rewards.append(self.curr_ep_reward)
        self.ep_lengths.append(self.curr_ep_length)
        if self.curr_ep_loss_length == 0:
            ep_avg_loss = 0
            ep_avg_q_value = 0
        else:
            ep_avg_loss = np.round(self.curr_ep_loss / self.curr_ep_loss_length, 5)
            ep_avg_q_value = np.round(self.curr_ep_q_value / self.curr_ep_loss_length, 5)
        self.ep_avg_losses.append(ep_avg_loss)
        self.ep_avg_qs.append(ep_avg_q_value)

        self.init_episode()

    def init_episode(self):
        self.curr_ep_reward = 0
        self.curr_ep_length = 0
        self.curr_ep_loss = 0
        self.curr_ep_q_value = 0
        self.curr_ep_loss_length = 0
    
    def record(self, episode, epsilon, step):
        mean_ep_reward = np.round(np.mean(self.ep_rewards[-100:]), 3)
        mean_ep_length = np.round(np.mean(self.ep_lengths[-100:]), 3)
        mean_ep_loss = np.round(np.mean(self.ep_avg_losses[-100:]), 3)
        mean_ep_q_value = np.round(np.mean(self.ep_avg_qs[-100:]), 3)

        self.moving_avg_ep_rewards.append(mean_ep_reward)
        self.moving_avg_ep_lengths.append(mean_ep_length)
        self.moving_avg_ep_avg_losses.append(mean_ep_loss)
        self.moving_avg_ep_avg_qs.append(mean_ep_q_value)

        last_record_time = self.record_time
        self.record_time = time.time()
        time_delta = np.round(self.record_time - last_record_time, 3)

        print(
            f"Episode {episode} - "
            f"Step {step} - "
            f"Epsilon {epsilon} - "
            f"Mean Reward {mean_ep_reward} - "
            f"Mean Length {mean_ep_length} - "
            f"Mean Loss {mean_ep_loss} - "
            f"Mean Q Value {mean_ep_q_value} - "
            f"Time Delta {time_delta}"
            f"Time {datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
        )

        with open(self.save_dir, "a") as f:
            f.write(
                f"{episode:8d}{step:8d}{epsilon:10.3f}"
                f"{mean_ep_reward:15.3f}{mean_ep_length:15.3f}{mean_ep_loss:15.3f}{mean_ep_loss:15.3f}\n"
                f"{time_delta:15.3f}"
                f"{datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'):>20}\n"
            )
        for metric in ["ep_rewards", "ep_lengths", "ep_avg_losses", "ep_avg_qs"]:
            plt.plot(getattr(self, f"moving_avg_{metric}"))
            plt.savefig(getattr(self, f"{metric}_plot"))
            plt.clf()
            

# Training the agent


In order to save the model we will use a callback function in order to keep good pratices and avoid losing the model in case of a crash or any other problem.

In [23]:
use_cuda = torch.cuda.is_available()
print(f"Using CUDA: {use_cuda}")

save_dir = Path(CHECKPOINT_PATH) / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
save_dir.mkdir(parents=True)
# (240, 256, 3)
agent = MarioAgent(state_dim=(4, 84, 84), action_dim=env.action_space.n)
logger = MetricLogger(save_dir)

episodes = 40000
for e in range(episodes):
    state = env.reset()
    # print(e)
    while True:
        action = agent.act(state)
        # print(action)
        
        next_state, reward, done, trunc, info = env.step(action)

        agent.cache(state, next_state, action, reward, done)

        q, loss = agent.learn()

        logger.log_step(reward, loss, q)

        state = next_state

        if done or info["flag_get"]:
            break

    logger.log_episode()

    if e % 20 == 0:
        logger.record(episodes, agent.exploration_rate, agent.curr_step)

Using CUDA: True




Episode 40000 - Step 114 - Epsilon 0.9999715004025548 - Mean Reward 598.0 - Mean Length 114.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 0.556Time 2023-05-29T16:43:51




Episode 40000 - Step 4153 - Epsilon 0.9989622886652558 - Mean Reward 674.333 - Mean Length 197.762 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 22.302Time 2023-05-29T16:44:14




Episode 40000 - Step 9687 - Epsilon 0.9975811797684266 - Mean Reward 675.073 - Mean Length 236.268 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 34.329Time 2023-05-29T16:44:48




MarioNet saved to ./checkpoints_pytorch/ mario_net_10000.chkpt at step 10000
Episode 40000 - Step 12591 - Epsilon 0.9968571985782883 - Mean Reward 661.689 - Mean Length 206.41 - Mean Loss 0.325 - Mean Q Value 0.844 - Time Delta 22.068Time 2023-05-29T16:45:10




Episode 40000 - Step 16695 - Epsilon 0.9958349474691164 - Mean Reward 624.358 - Mean Length 206.111 - Mean Loss 0.36 - Mean Q Value 1.423 - Time Delta 31.092Time 2023-05-29T16:45:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_20000.chkpt at step 20000
Episode 40000 - Step 21806 - Epsilon 0.9945633317823381 - Mean Reward 639.55 - Mean Length 216.92 - Mean Loss 0.394 - Mean Q Value 2.041 - Time Delta 38.518Time 2023-05-29T16:46:20




Episode 40000 - Step 26006 - Epsilon 0.9935195882146185 - Mean Reward 655.04 - Mean Length 218.53 - Mean Loss 0.498 - Mean Q Value 3.232 - Time Delta 32.118Time 2023-05-29T16:46:52




Episode 40000 - Step 29748 - Epsilon 0.9925905851325343 - Mean Reward 639.4 - Mean Length 200.61 - Mean Loss 0.592 - Mean Q Value 4.446 - Time Delta 29.231Time 2023-05-29T16:47:21




MarioNet saved to ./checkpoints_pytorch/ mario_net_30000.chkpt at step 30000
Episode 40000 - Step 34223 - Epsilon 0.9914807452089937 - Mean Reward 643.09 - Mean Length 216.32 - Mean Loss 0.513 - Mean Q Value 5.607 - Time Delta 35.43Time 2023-05-29T16:47:57




Episode 40000 - Step 39255 - Epsilon 0.9902342464876139 - Mean Reward 683.32 - Mean Length 225.6 - Mean Loss 0.519 - Mean Q Value 6.65 - Time Delta 39.496Time 2023-05-29T16:48:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_40000.chkpt at step 40000
Episode 40000 - Step 43277 - Epsilon 0.9892390662389464 - Mean Reward 676.56 - Mean Length 214.71 - Mean Loss 0.547 - Mean Q Value 7.879 - Time Delta 30.26Time 2023-05-29T16:49:06




Episode 40000 - Step 48780 - Epsilon 0.9878790561548213 - Mean Reward 678.56 - Mean Length 227.74 - Mean Loss 0.554 - Mean Q Value 8.9 - Time Delta 40.365Time 2023-05-29T16:49:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_50000.chkpt at step 50000
Episode 40000 - Step 52416 - Epsilon 0.9869814819897184 - Mean Reward 676.12 - Mean Length 226.68 - Mean Loss 0.599 - Mean Q Value 10.204 - Time Delta 26.946Time 2023-05-29T16:50:14




Episode 40000 - Step 55743 - Epsilon 0.9861609013456675 - Mean Reward 671.68 - Mean Length 215.2 - Mean Loss 0.612 - Mean Q Value 11.131 - Time Delta 24.076Time 2023-05-29T16:50:38




Episode 40000 - Step 58646 - Epsilon 0.9854454546311137 - Mean Reward 641.61 - Mean Length 193.91 - Mean Loss 0.646 - Mean Q Value 12.162 - Time Delta 22.584Time 2023-05-29T16:51:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_60000.chkpt at step 60000
Episode 40000 - Step 62279 - Epsilon 0.9845508300178945 - Mean Reward 628.96 - Mean Length 190.02 - Mean Loss 0.661 - Mean Q Value 13.08 - Time Delta 26.562Time 2023-05-29T16:51:27




Episode 40000 - Step 66076 - Epsilon 0.9836166884626838 - Mean Reward 604.24 - Mean Length 172.96 - Mean Loss 0.697 - Mean Q Value 14.045 - Time Delta 29.907Time 2023-05-29T16:51:57




Episode 40000 - Step 69551 - Epsilon 0.9827625424310993 - Mean Reward 603.64 - Mean Length 171.35 - Mean Loss 0.705 - Mean Q Value 14.641 - Time Delta 27.262Time 2023-05-29T16:52:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_70000.chkpt at step 70000
Episode 40000 - Step 74047 - Epsilon 0.9816585377621082 - Mean Reward 619.41 - Mean Length 183.04 - Mean Loss 0.737 - Mean Q Value 15.583 - Time Delta 35.651Time 2023-05-29T16:53:00




Episode 40000 - Step 77097 - Epsilon 0.9809103083321526 - Mean Reward 627.57 - Mean Length 184.51 - Mean Loss 0.765 - Mean Q Value 16.573 - Time Delta 22.997Time 2023-05-29T16:53:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_80000.chkpt at step 80000
Episode 40000 - Step 80559 - Epsilon 0.9800616976433459 - Mean Reward 624.08 - Mean Length 182.8 - Mean Loss 0.789 - Mean Q Value 17.344 - Time Delta 25.298Time 2023-05-29T16:53:48




Episode 40000 - Step 84460 - Epsilon 0.9791063582762459 - Mean Reward 636.2 - Mean Length 183.84 - Mean Loss 0.828 - Mean Q Value 18.32 - Time Delta 32.566Time 2023-05-29T16:54:21




MarioNet saved to ./checkpoints_pytorch/ mario_net_90000.chkpt at step 90000
Episode 40000 - Step 90116 - Epsilon 0.9777228800621256 - Mean Reward 653.57 - Mean Length 205.65 - Mean Loss 0.848 - Mean Q Value 19.338 - Time Delta 178.616Time 2023-05-29T16:57:19




Episode 40000 - Step 94655 - Epsilon 0.9766140381333902 - Mean Reward 649.72 - Mean Length 206.08 - Mean Loss 0.866 - Mean Q Value 20.137 - Time Delta 267.788Time 2023-05-29T17:01:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_100000.chkpt at step 100000
Episode 40000 - Step 100268 - Epsilon 0.9752445653940339 - Mean Reward 685.27 - Mean Length 231.71 - Mean Loss 0.873 - Mean Q Value 20.544 - Time Delta 199.844Time 2023-05-29T17:05:07




Episode 40000 - Step 105473 - Epsilon 0.9739763535538921 - Mean Reward 697.9 - Mean Length 249.14 - Mean Loss 0.883 - Mean Q Value 20.898 - Time Delta 248.414Time 2023-05-29T17:09:15




Episode 40000 - Step 109930 - Epsilon 0.9728917046634475 - Mean Reward 675.82 - Mean Length 254.7 - Mean Loss 0.864 - Mean Q Value 20.873 - Time Delta 31.765Time 2023-05-29T17:09:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_110000.chkpt at step 110000
Episode 40000 - Step 113863 - Epsilon 0.9719355789075426 - Mean Reward 664.76 - Mean Length 237.47 - Mean Loss 0.885 - Mean Q Value 20.93 - Time Delta 29.475Time 2023-05-29T17:10:16




Episode 40000 - Step 119189 - Epsilon 0.9706423077087595 - Mean Reward 665.6 - Mean Length 245.34 - Mean Loss 0.878 - Mean Q Value 20.687 - Time Delta 42.06Time 2023-05-29T17:10:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_120000.chkpt at step 120000
Episode 40000 - Step 123703 - Epsilon 0.9695475555575924 - Mean Reward 640.15 - Mean Length 234.35 - Mean Loss 0.889 - Mean Q Value 20.568 - Time Delta 32.695Time 2023-05-29T17:11:31




Episode 40000 - Step 127926 - Epsilon 0.968524495740514 - Mean Reward 634.71 - Mean Length 224.53 - Mean Loss 0.882 - Mean Q Value 20.5 - Time Delta 33.055Time 2023-05-29T17:12:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_130000.chkpt at step 130000
Episode 40000 - Step 131549 - Epsilon 0.9676476517294453 - Mean Reward 656.75 - Mean Length 216.19 - Mean Loss 0.896 - Mean Q Value 20.564 - Time Delta 25.489Time 2023-05-29T17:12:30




Episode 40000 - Step 135625 - Epsilon 0.9666621208622598 - Mean Reward 661.88 - Mean Length 217.62 - Mean Loss 0.894 - Mean Q Value 20.662 - Time Delta 31.891Time 2023-05-29T17:13:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_140000.chkpt at step 140000
Episode 40000 - Step 140910 - Epsilon 0.9653857617557566 - Mean Reward 651.05 - Mean Length 217.21 - Mean Loss 0.89 - Mean Q Value 20.908 - Time Delta 37.992Time 2023-05-29T17:13:40




Episode 40000 - Step 144352 - Epsilon 0.964555404516295 - Mean Reward 647.9 - Mean Length 206.49 - Mean Loss 0.889 - Mean Q Value 21.34 - Time Delta 26.868Time 2023-05-29T17:14:06




Episode 40000 - Step 147782 - Epsilon 0.9637286526739739 - Mean Reward 660.21 - Mean Length 198.56 - Mean Loss 0.906 - Mean Q Value 21.844 - Time Delta 26.469Time 2023-05-29T17:14:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_150000.chkpt at step 150000
Episode 40000 - Step 152202 - Epsilon 0.962664320531385 - Mean Reward 653.18 - Mean Length 206.53 - Mean Loss 0.927 - Mean Q Value 22.408 - Time Delta 33.98Time 2023-05-29T17:15:07




Episode 40000 - Step 157976 - Epsilon 0.9612757168767337 - Mean Reward 652.61 - Mean Length 223.51 - Mean Loss 0.943 - Mean Q Value 22.863 - Time Delta 44.085Time 2023-05-29T17:15:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_160000.chkpt at step 160000
Episode 40000 - Step 161748 - Epsilon 0.9603696610348098 - Mean Reward 630.47 - Mean Length 208.38 - Mean Loss 0.979 - Mean Q Value 23.23 - Time Delta 29.537Time 2023-05-29T17:16:21




Episode 40000 - Step 166331 - Epsilon 0.9592699474767673 - Mean Reward 632.22 - Mean Length 219.79 - Mean Loss 1.004 - Mean Q Value 23.592 - Time Delta 35.438Time 2023-05-29T17:16:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_170000.chkpt at step 170000
Episode 40000 - Step 170687 - Epsilon 0.9582258709761426 - Mean Reward 613.2 - Mean Length 229.05 - Mean Loss 1.014 - Mean Q Value 23.784 - Time Delta 33.578Time 2023-05-29T17:17:30




Episode 40000 - Step 175445 - Epsilon 0.957086738793246 - Mean Reward 629.44 - Mean Length 232.43 - Mean Loss 1.028 - Mean Q Value 23.711 - Time Delta 35.469Time 2023-05-29T17:18:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_180000.chkpt at step 180000
Episode 40000 - Step 180193 - Epsilon 0.9559513506783747 - Mean Reward 642.48 - Mean Length 222.17 - Mean Loss 1.01 - Mean Q Value 23.561 - Time Delta 35.335Time 2023-05-29T17:18:40




Episode 40000 - Step 186256 - Epsilon 0.9545034648310922 - Mean Reward 683.9 - Mean Length 245.08 - Mean Loss 1.0 - Mean Q Value 23.581 - Time Delta 46.34Time 2023-05-29T17:19:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_190000.chkpt at step 190000
Episode 40000 - Step 191110 - Epsilon 0.9533458772399853 - Mean Reward 695.5 - Mean Length 247.79 - Mean Loss 0.982 - Mean Q Value 23.346 - Time Delta 35.941Time 2023-05-29T17:20:03




Episode 40000 - Step 194345 - Epsilon 0.9525751703619226 - Mean Reward 695.98 - Mean Length 236.58 - Mean Loss 0.97 - Mean Q Value 23.086 - Time Delta 25.288Time 2023-05-29T17:20:28




Episode 40000 - Step 198841 - Epsilon 0.951505077241522 - Mean Reward 681.48 - Mean Length 233.96 - Mean Loss 0.943 - Mean Q Value 23.133 - Time Delta 34.26Time 2023-05-29T17:21:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_200000.chkpt at step 200000
Episode 40000 - Step 203463 - Epsilon 0.9504062479581411 - Mean Reward 671.84 - Mean Length 232.7 - Mean Loss 0.961 - Mean Q Value 23.369 - Time Delta 35.697Time 2023-05-29T17:21:38




Episode 40000 - Step 207757 - Epsilon 0.9493865341526607 - Mean Reward 643.19 - Mean Length 215.01 - Mean Loss 0.964 - Mean Q Value 23.47 - Time Delta 33.28Time 2023-05-29T17:22:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_210000.chkpt at step 210000
Episode 40000 - Step 212187 - Epsilon 0.9483356704577618 - Mean Reward 613.03 - Mean Length 210.77 - Mean Loss 0.98 - Mean Q Value 23.791 - Time Delta 34.687Time 2023-05-29T17:22:46




Episode 40000 - Step 216557 - Epsilon 0.9473001793482161 - Mean Reward 625.09 - Mean Length 222.12 - Mean Loss 0.984 - Mean Q Value 24.127 - Time Delta 33.712Time 2023-05-29T17:23:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_220000.chkpt at step 220000
Episode 40000 - Step 221696 - Epsilon 0.9460839167545761 - Mean Reward 633.35 - Mean Length 228.55 - Mean Loss 0.997 - Mean Q Value 24.329 - Time Delta 39.778Time 2023-05-29T17:23:59




Episode 40000 - Step 225069 - Epsilon 0.9452864676637073 - Mean Reward 632.12 - Mean Length 216.06 - Mean Loss 1.009 - Mean Q Value 24.593 - Time Delta 26.079Time 2023-05-29T17:24:25




Episode 40000 - Step 229795 - Epsilon 0.9441702710854516 - Mean Reward 647.55 - Mean Length 220.38 - Mean Loss 1.031 - Mean Q Value 24.919 - Time Delta 36.609Time 2023-05-29T17:25:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_230000.chkpt at step 230000
Episode 40000 - Step 233077 - Epsilon 0.94339589701153 - Mean Reward 655.78 - Mean Length 208.9 - Mean Loss 1.064 - Mean Q Value 25.299 - Time Delta 24.26Time 2023-05-29T17:25:26




Episode 40000 - Step 238093 - Epsilon 0.94221361985144 - Mean Reward 653.62 - Mean Length 215.36 - Mean Loss 1.106 - Mean Q Value 25.68 - Time Delta 38.8Time 2023-05-29T17:26:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_240000.chkpt at step 240000
Episode 40000 - Step 241252 - Epsilon 0.9414698003064851 - Mean Reward 620.93 - Mean Length 195.56 - Mean Loss 1.149 - Mean Q Value 26.027 - Time Delta 24.488Time 2023-05-29T17:26:30




Episode 40000 - Step 244992 - Epsilon 0.9405899373331341 - Mean Reward 609.59 - Mean Length 199.23 - Mean Loss 1.151 - Mean Q Value 26.193 - Time Delta 28.851Time 2023-05-29T17:26:58




Episode 40000 - Step 248558 - Epsilon 0.9397517749655028 - Mean Reward 588.53 - Mean Length 187.63 - Mean Loss 1.163 - Mean Q Value 26.457 - Time Delta 27.245Time 2023-05-29T17:27:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_250000.chkpt at step 250000
Episode 40000 - Step 254681 - Epsilon 0.938314350205751 - Mean Reward 603.23 - Mean Length 216.04 - Mean Loss 1.124 - Mean Q Value 26.632 - Time Delta 45.724Time 2023-05-29T17:28:11




Episode 40000 - Step 258610 - Epsilon 0.9373931433217806 - Mean Reward 608.88 - Mean Length 205.17 - Mean Loss 1.11 - Mean Q Value 26.732 - Time Delta 48.396Time 2023-05-29T17:29:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_260000.chkpt at step 260000
Episode 40000 - Step 262703 - Epsilon 0.9364344462449495 - Mean Reward 639.86 - Mean Length 214.51 - Mean Loss 1.077 - Mean Q Value 26.874 - Time Delta 72.935Time 2023-05-29T17:30:13




Episode 40000 - Step 267175 - Epsilon 0.9353880974210841 - Mean Reward 646.39 - Mean Length 221.83 - Mean Loss 1.077 - Mean Q Value 27.109 - Time Delta 76.163Time 2023-05-29T17:31:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_270000.chkpt at step 270000
Episode 40000 - Step 270864 - Epsilon 0.9345258333129879 - Mean Reward 676.12 - Mean Length 223.06 - Mean Loss 1.057 - Mean Q Value 27.105 - Time Delta 60.351Time 2023-05-29T17:32:29




Episode 40000 - Step 274414 - Epsilon 0.9336968094659265 - Mean Reward 663.05 - Mean Length 197.33 - Mean Loss 1.083 - Mean Q Value 27.286 - Time Delta 62.281Time 2023-05-29T17:33:32




Episode 40000 - Step 279276 - Epsilon 0.9325625903134196 - Mean Reward 679.69 - Mean Length 206.66 - Mean Loss 1.09 - Mean Q Value 27.501 - Time Delta 82.521Time 2023-05-29T17:34:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_280000.chkpt at step 280000
Episode 40000 - Step 282115 - Epsilon 0.9319009387635147 - Mean Reward 647.57 - Mean Length 194.12 - Mean Loss 1.095 - Mean Q Value 27.672 - Time Delta 47.954Time 2023-05-29T17:35:42




Episode 40000 - Step 287002 - Epsilon 0.930763083877832 - Mean Reward 652.19 - Mean Length 198.27 - Mean Loss 1.092 - Mean Q Value 27.834 - Time Delta 82.962Time 2023-05-29T17:37:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_290000.chkpt at step 290000
Episode 40000 - Step 290536 - Epsilon 0.9299411177473713 - Mean Reward 651.2 - Mean Length 196.72 - Mean Loss 1.106 - Mean Q Value 27.989 - Time Delta 62.602Time 2023-05-29T17:38:08




Episode 40000 - Step 295442 - Epsilon 0.9288012439942772 - Mean Reward 661.73 - Mean Length 210.28 - Mean Loss 1.103 - Mean Q Value 28.225 - Time Delta 84.444Time 2023-05-29T17:39:32




Episode 40000 - Step 299821 - Epsilon 0.9277849950759886 - Mean Reward 647.62 - Mean Length 205.45 - Mean Loss 1.089 - Mean Q Value 28.538 - Time Delta 74.022Time 2023-05-29T17:40:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_300000.chkpt at step 300000
Episode 40000 - Step 304266 - Epsilon 0.9267545665091723 - Mean Reward 683.57 - Mean Length 221.51 - Mean Loss 1.109 - Mean Q Value 28.944 - Time Delta 77.09Time 2023-05-29T17:42:03




Episode 40000 - Step 308725 - Epsilon 0.9257220423370313 - Mean Reward 700.58 - Mean Length 217.23 - Mean Loss 1.124 - Mean Q Value 29.243 - Time Delta 76.553Time 2023-05-29T17:43:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_310000.chkpt at step 310000
Episode 40000 - Step 313141 - Epsilon 0.9247006090094045 - Mean Reward 696.51 - Mean Length 226.05 - Mean Loss 1.124 - Mean Q Value 29.515 - Time Delta 79.325Time 2023-05-29T17:44:39




Episode 40000 - Step 316471 - Epsilon 0.9239311160020155 - Mean Reward 681.64 - Mean Length 210.29 - Mean Loss 1.117 - Mean Q Value 29.394 - Time Delta 57.672Time 2023-05-29T17:45:37




Episode 40000 - Step 319906 - Epsilon 0.9231380306366724 - Mean Reward 673.97 - Mean Length 200.85 - Mean Loss 1.135 - Mean Q Value 29.328 - Time Delta 62.263Time 2023-05-29T17:46:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_320000.chkpt at step 320000
Episode 40000 - Step 324022 - Epsilon 0.9221886100450041 - Mean Reward 651.51 - Mean Length 197.56 - Mean Loss 1.125 - Mean Q Value 29.263 - Time Delta 73.984Time 2023-05-29T17:47:53




Episode 40000 - Step 328473 - Epsilon 0.92116301526196 - Mean Reward 637.16 - Mean Length 197.48 - Mean Loss 1.121 - Mean Q Value 29.165 - Time Delta 80.89Time 2023-05-29T17:49:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_330000.chkpt at step 330000
Episode 40000 - Step 332511 - Epsilon 0.9202335702980194 - Mean Reward 619.86 - Mean Length 193.7 - Mean Loss 1.107 - Mean Q Value 29.013 - Time Delta 73.132Time 2023-05-29T17:50:27




Episode 40000 - Step 336552 - Epsilon 0.9193043736568284 - Mean Reward 631.97 - Mean Length 200.81 - Mean Loss 1.107 - Mean Q Value 29.062 - Time Delta 72.531Time 2023-05-29T17:51:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_340000.chkpt at step 340000
Episode 40000 - Step 341918 - Epsilon 0.9180719535159727 - Mean Reward 665.19 - Mean Length 220.12 - Mean Loss 1.098 - Mean Q Value 29.145 - Time Delta 96.767Time 2023-05-29T17:53:16




Episode 40000 - Step 346871 - Epsilon 0.9169358543099417 - Mean Reward 720.84 - Mean Length 228.49 - Mean Loss 1.102 - Mean Q Value 29.001 - Time Delta 86.767Time 2023-05-29T17:54:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_350000.chkpt at step 350000
Episode 40000 - Step 350546 - Epsilon 0.9160938062634706 - Mean Reward 714.43 - Mean Length 220.73 - Mean Loss 1.105 - Mean Q Value 29.015 - Time Delta 64.244Time 2023-05-29T17:55:47




Episode 40000 - Step 354735 - Epsilon 0.9151349290845671 - Mean Reward 732.67 - Mean Length 222.24 - Mean Loss 1.132 - Mean Q Value 29.282 - Time Delta 73.875Time 2023-05-29T17:57:01




Episode 40000 - Step 359878 - Epsilon 0.9139590503074099 - Mean Reward 753.59 - Mean Length 233.26 - Mean Loss 1.14 - Mean Q Value 29.315 - Time Delta 92.926Time 2023-05-29T17:58:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_360000.chkpt at step 360000
Episode 40000 - Step 363604 - Epsilon 0.9131080937401126 - Mean Reward 726.5 - Mean Length 216.86 - Mean Loss 1.171 - Mean Q Value 29.172 - Time Delta 82.395Time 2023-05-29T17:59:56




Episode 40000 - Step 368670 - Epsilon 0.9119523742086433 - Mean Reward 703.49 - Mean Length 217.99 - Mean Loss 1.173 - Mean Q Value 29.13 - Time Delta 88.722Time 2023-05-29T18:01:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_370000.chkpt at step 370000
Episode 40000 - Step 373121 - Epsilon 0.9109381634634813 - Mean Reward 723.12 - Mean Length 225.75 - Mean Loss 1.199 - Mean Q Value 29.113 - Time Delta 75.355Time 2023-05-29T18:02:41




Episode 40000 - Step 376392 - Epsilon 0.9101935481831962 - Mean Reward 708.98 - Mean Length 216.57 - Mean Loss 1.209 - Mean Q Value 29.079 - Time Delta 56.422Time 2023-05-29T18:03:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_380000.chkpt at step 380000
Episode 40000 - Step 380987 - Epsilon 0.9091485635419236 - Mean Reward 704.89 - Mean Length 211.09 - Mean Loss 1.233 - Mean Q Value 29.196 - Time Delta 81.129Time 2023-05-29T18:04:58




Episode 40000 - Step 387067 - Epsilon 0.9077677072691686 - Mean Reward 705.19 - Mean Length 234.63 - Mean Loss 1.206 - Mean Q Value 29.307 - Time Delta 105.825Time 2023-05-29T18:06:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_390000.chkpt at step 390000
Episode 40000 - Step 391054 - Epsilon 0.9068633404824508 - Mean Reward 673.06 - Mean Length 223.84 - Mean Loss 1.178 - Mean Q Value 29.246 - Time Delta 70.774Time 2023-05-29T18:07:55




Episode 40000 - Step 395005 - Epsilon 0.9059680283509206 - Mean Reward 664.49 - Mean Length 218.84 - Mean Loss 1.139 - Mean Q Value 28.97 - Time Delta 69.338Time 2023-05-29T18:09:04




Episode 40000 - Step 399514 - Epsilon 0.9049473511505026 - Mean Reward 671.58 - Mean Length 231.22 - Mean Loss 1.118 - Mean Q Value 28.698 - Time Delta 78.469Time 2023-05-29T18:10:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_400000.chkpt at step 400000
Episode 40000 - Step 402868 - Epsilon 0.9041888707389979 - Mean Reward 650.78 - Mean Length 218.81 - Mean Loss 1.08 - Mean Q Value 28.431 - Time Delta 59.539Time 2023-05-29T18:11:22




Episode 40000 - Step 405763 - Epsilon 0.9035347007182714 - Mean Reward 619.67 - Mean Length 186.96 - Mean Loss 1.067 - Mean Q Value 28.369 - Time Delta 50.695Time 2023-05-29T18:12:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_410000.chkpt at step 410000
Episode 40000 - Step 410841 - Epsilon 0.9023883910462785 - Mean Reward 646.68 - Mean Length 197.87 - Mean Loss 1.085 - Mean Q Value 28.414 - Time Delta 81.96Time 2023-05-29T18:13:35




Episode 40000 - Step 413908 - Epsilon 0.901696749852721 - Mean Reward 623.18 - Mean Length 189.03 - Mean Loss 1.061 - Mean Q Value 28.585 - Time Delta 31.372Time 2023-05-29T18:14:06




Episode 40000 - Step 417101 - Epsilon 0.9009772575376884 - Mean Reward 623.71 - Mean Length 175.87 - Mean Loss 1.025 - Mean Q Value 28.804 - Time Delta 33.016Time 2023-05-29T18:14:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_420000.chkpt at step 420000
Episode 40000 - Step 421416 - Epsilon 0.9000058522454908 - Mean Reward 631.03 - Mean Length 185.48 - Mean Loss 1.024 - Mean Q Value 28.909 - Time Delta 44.516Time 2023-05-29T18:15:24




Episode 40000 - Step 425645 - Epsilon 0.8990548237652111 - Mean Reward 663.57 - Mean Length 198.82 - Mean Loss 1.023 - Mean Q Value 29.035 - Time Delta 43.751Time 2023-05-29T18:16:07




Episode 40000 - Step 429904 - Epsilon 0.8980980644670031 - Mean Reward 647.15 - Mean Length 190.63 - Mean Loss 1.007 - Mean Q Value 29.371 - Time Delta 44.048Time 2023-05-29T18:16:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_430000.chkpt at step 430000
Episode 40000 - Step 433900 - Epsilon 0.8972013123906238 - Mean Reward 674.77 - Mean Length 199.92 - Mean Loss 1.024 - Mean Q Value 29.833 - Time Delta 41.218Time 2023-05-29T18:17:33




Episode 40000 - Step 437555 - Epsilon 0.8963818690290855 - Mean Reward 655.61 - Mean Length 204.54 - Mean Loss 1.042 - Mean Q Value 30.075 - Time Delta 38.04Time 2023-05-29T18:18:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_440000.chkpt at step 440000
Episode 40000 - Step 440963 - Epsilon 0.8956184768323963 - Mean Reward 646.29 - Mean Length 195.47 - Mean Loss 1.064 - Mean Q Value 30.489 - Time Delta 35.521Time 2023-05-29T18:18:46




Episode 40000 - Step 444888 - Epsilon 0.8947400821249634 - Mean Reward 639.01 - Mean Length 192.43 - Mean Loss 1.089 - Mean Q Value 30.742 - Time Delta 40.606Time 2023-05-29T18:19:27




Episode 40000 - Step 449105 - Epsilon 0.8937972993271063 - Mean Reward 643.53 - Mean Length 192.01 - Mean Loss 1.095 - Mean Q Value 30.892 - Time Delta 43.45Time 2023-05-29T18:20:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_450000.chkpt at step 450000
Episode 40000 - Step 452987 - Epsilon 0.8929302897240684 - Mean Reward 617.86 - Mean Length 190.87 - Mean Loss 1.108 - Mean Q Value 30.762 - Time Delta 40.14Time 2023-05-29T18:20:50




Episode 40000 - Step 457717 - Epsilon 0.8918750235734922 - Mean Reward 636.85 - Mean Length 201.62 - Mean Loss 1.122 - Mean Q Value 30.736 - Time Delta 48.39Time 2023-05-29T18:21:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_460000.chkpt at step 460000
Episode 40000 - Step 461499 - Epsilon 0.8910321541625342 - Mean Reward 639.79 - Mean Length 205.36 - Mean Loss 1.105 - Mean Q Value 30.554 - Time Delta 39.223Time 2023-05-29T18:22:18




Episode 40000 - Step 464743 - Epsilon 0.8903098199412345 - Mean Reward 643.13 - Mean Length 198.55 - Mean Loss 1.092 - Mean Q Value 30.588 - Time Delta 33.666Time 2023-05-29T18:22:52




Episode 40000 - Step 467322 - Epsilon 0.8897359776253126 - Mean Reward 617.11 - Mean Length 182.17 - Mean Loss 1.117 - Mean Q Value 30.536 - Time Delta 26.857Time 2023-05-29T18:23:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_470000.chkpt at step 470000
Episode 40000 - Step 471166 - Epsilon 0.888881351957271 - Mean Reward 626.37 - Mean Length 181.79 - Mean Loss 1.13 - Mean Q Value 30.678 - Time Delta 39.934Time 2023-05-29T18:23:58




Episode 40000 - Step 475427 - Epsilon 0.887934975132117 - Mean Reward 655.16 - Mean Length 177.1 - Mean Loss 1.137 - Mean Q Value 30.883 - Time Delta 44.413Time 2023-05-29T18:24:43




Episode 40000 - Step 479490 - Epsilon 0.8870335129759177 - Mean Reward 665.12 - Mean Length 179.91 - Mean Loss 1.151 - Mean Q Value 31.226 - Time Delta 41.996Time 2023-05-29T18:25:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_480000.chkpt at step 480000
Episode 40000 - Step 483767 - Epsilon 0.8860855591636674 - Mean Reward 649.91 - Mean Length 190.24 - Mean Loss 1.149 - Mean Q Value 31.32 - Time Delta 44.179Time 2023-05-29T18:26:09




Episode 40000 - Step 487385 - Epsilon 0.8852844570282123 - Mean Reward 668.33 - Mean Length 200.63 - Mean Loss 1.138 - Mean Q Value 31.473 - Time Delta 37.479Time 2023-05-29T18:26:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_490000.chkpt at step 490000
Episode 40000 - Step 490984 - Epsilon 0.8844882804720072 - Mean Reward 681.63 - Mean Length 198.18 - Mean Loss 1.124 - Mean Q Value 31.592 - Time Delta 37.463Time 2023-05-29T18:27:24




Episode 40000 - Step 494242 - Epsilon 0.8837681579871407 - Mean Reward 643.57 - Mean Length 188.15 - Mean Loss 1.112 - Mean Q Value 31.705 - Time Delta 33.784Time 2023-05-29T18:27:58




Episode 40000 - Step 498103 - Epsilon 0.8829155122402979 - Mean Reward 620.36 - Mean Length 186.13 - Mean Loss 1.095 - Mean Q Value 31.954 - Time Delta 40.144Time 2023-05-29T18:28:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_500000.chkpt at step 500000
Episode 40000 - Step 503650 - Epsilon 0.8816919775650172 - Mean Reward 659.62 - Mean Length 198.83 - Mean Loss 1.088 - Mean Q Value 32.149 - Time Delta 57.175Time 2023-05-29T18:29:35




Episode 40000 - Step 506707 - Epsilon 0.881018401809787 - Mean Reward 654.76 - Mean Length 193.22 - Mean Loss 1.084 - Mean Q Value 32.251 - Time Delta 31.786Time 2023-05-29T18:30:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_510000.chkpt at step 510000
Episode 40000 - Step 510363 - Epsilon 0.8802135187773877 - Mean Reward 662.6 - Mean Length 193.79 - Mean Loss 1.069 - Mean Q Value 32.338 - Time Delta 38.109Time 2023-05-29T18:30:45




Episode 40000 - Step 513878 - Epsilon 0.8794403708026525 - Mean Reward 655.48 - Mean Length 196.36 - Mean Loss 1.063 - Mean Q Value 32.354 - Time Delta 36.266Time 2023-05-29T18:31:21




Episode 40000 - Step 518517 - Epsilon 0.8784210309090289 - Mean Reward 693.97 - Mean Length 204.14 - Mean Loss 1.071 - Mean Q Value 32.167 - Time Delta 48.062Time 2023-05-29T18:32:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_520000.chkpt at step 520000
Episode 40000 - Step 523420 - Epsilon 0.8773449658238509 - Mean Reward 690.19 - Mean Length 197.7 - Mean Loss 1.066 - Mean Q Value 32.006 - Time Delta 50.649Time 2023-05-29T18:33:00




Episode 40000 - Step 527488 - Epsilon 0.8764531594424186 - Mean Reward 687.31 - Mean Length 207.81 - Mean Loss 1.061 - Mean Q Value 31.739 - Time Delta 41.637Time 2023-05-29T18:33:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_530000.chkpt at step 530000
Episode 40000 - Step 530830 - Epsilon 0.8757211885595226 - Mean Reward 662.24 - Mean Length 204.67 - Mean Loss 1.054 - Mean Q Value 31.519 - Time Delta 34.532Time 2023-05-29T18:34:16




Episode 40000 - Step 534034 - Epsilon 0.8750200166565787 - Mean Reward 674.84 - Mean Length 201.56 - Mean Loss 1.042 - Mean Q Value 31.34 - Time Delta 33.181Time 2023-05-29T18:34:49




Episode 40000 - Step 537581 - Epsilon 0.874244436483432 - Mean Reward 660.46 - Mean Length 190.64 - Mean Loss 1.042 - Mean Q Value 31.247 - Time Delta 36.427Time 2023-05-29T18:35:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_540000.chkpt at step 540000
Episode 40000 - Step 541730 - Epsilon 0.8733380964601232 - Mean Reward 656.29 - Mean Length 183.1 - Mean Loss 1.05 - Mean Q Value 31.396 - Time Delta 43.036Time 2023-05-29T18:36:09




Episode 40000 - Step 545195 - Epsilon 0.8725818948165576 - Mean Reward 672.34 - Mean Length 177.07 - Mean Loss 1.069 - Mean Q Value 31.778 - Time Delta 36.057Time 2023-05-29T18:36:45




Episode 40000 - Step 548875 - Epsilon 0.8717794885363994 - Mean Reward 683.68 - Mean Length 180.45 - Mean Loss 1.094 - Mean Q Value 32.069 - Time Delta 38.06Time 2023-05-29T18:37:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_550000.chkpt at step 550000
Episode 40000 - Step 553212 - Epsilon 0.8708347737537853 - Mean Reward 680.93 - Mean Length 191.78 - Mean Loss 1.119 - Mean Q Value 32.293 - Time Delta 44.903Time 2023-05-29T18:38:08




Episode 40000 - Step 557163 - Epsilon 0.8699750312736401 - Mean Reward 681.67 - Mean Length 195.82 - Mean Loss 1.111 - Mean Q Value 32.37 - Time Delta 40.858Time 2023-05-29T18:38:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_560000.chkpt at step 560000
Episode 40000 - Step 560455 - Epsilon 0.8692593362822256 - Mean Reward 659.53 - Mean Length 187.25 - Mean Loss 1.13 - Mean Q Value 32.359 - Time Delta 34.077Time 2023-05-29T18:39:23




Episode 40000 - Step 566528 - Epsilon 0.86794058448067 - Mean Reward 680.86 - Mean Length 213.33 - Mean Loss 1.119 - Mean Q Value 32.392 - Time Delta 62.747Time 2023-05-29T18:40:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_570000.chkpt at step 570000
Episode 40000 - Step 570503 - Epsilon 0.8670784968377048 - Mean Reward 672.08 - Mean Length 216.28 - Mean Loss 1.129 - Mean Q Value 32.319 - Time Delta 41.241Time 2023-05-29T18:41:07




Episode 40000 - Step 574916 - Epsilon 0.8661224198593656 - Mean Reward 702.46 - Mean Length 217.04 - Mean Loss 1.13 - Mean Q Value 32.107 - Time Delta 45.65Time 2023-05-29T18:41:52




Episode 40000 - Step 578854 - Epsilon 0.8652701418330576 - Mean Reward 697.49 - Mean Length 216.91 - Mean Loss 1.151 - Mean Q Value 31.942 - Time Delta 40.949Time 2023-05-29T18:42:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_580000.chkpt at step 580000
Episode 40000 - Step 582606 - Epsilon 0.8644588988709321 - Mean Reward 702.81 - Mean Length 221.51 - Mean Loss 1.147 - Mean Q Value 31.618 - Time Delta 38.957Time 2023-05-29T18:43:12




Episode 40000 - Step 587109 - Epsilon 0.8634862817084172 - Mean Reward 671.29 - Mean Length 205.81 - Mean Loss 1.153 - Mean Q Value 31.158 - Time Delta 45.341Time 2023-05-29T18:43:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_590000.chkpt at step 590000
Episode 40000 - Step 590801 - Epsilon 0.8626896514721183 - Mean Reward 689.05 - Mean Length 202.98 - Mean Loss 1.142 - Mean Q Value 30.844 - Time Delta 38.387Time 2023-05-29T18:44:36




Episode 40000 - Step 595359 - Epsilon 0.8617071763628207 - Mean Reward 682.35 - Mean Length 204.43 - Mean Loss 1.142 - Mean Q Value 30.675 - Time Delta 46.796Time 2023-05-29T18:45:23




Episode 40000 - Step 599493 - Epsilon 0.8608170619305213 - Mean Reward 689.25 - Mean Length 206.39 - Mean Loss 1.151 - Mean Q Value 30.499 - Time Delta 42.587Time 2023-05-29T18:46:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_600000.chkpt at step 600000
Episode 40000 - Step 603526 - Epsilon 0.8599495804118826 - Mean Reward 687.84 - Mean Length 209.2 - Mean Loss 1.159 - Mean Q Value 30.365 - Time Delta 41.721Time 2023-05-29T18:46:47




Episode 40000 - Step 606924 - Epsilon 0.8592193633555508 - Mean Reward 680.74 - Mean Length 198.15 - Mean Loss 1.159 - Mean Q Value 30.457 - Time Delta 35.144Time 2023-05-29T18:47:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_610000.chkpt at step 610000
Episode 40000 - Step 611454 - Epsilon 0.8582468480955594 - Mean Reward 684.58 - Mean Length 206.53 - Mean Loss 1.17 - Mean Q Value 30.654 - Time Delta 46.956Time 2023-05-29T18:48:09




Episode 40000 - Step 616440 - Epsilon 0.8571777097445237 - Mean Reward 691.99 - Mean Length 210.81 - Mean Loss 1.172 - Mean Q Value 30.875 - Time Delta 51.875Time 2023-05-29T18:49:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_620000.chkpt at step 620000
Episode 40000 - Step 620836 - Epsilon 0.8562361887842244 - Mean Reward 689.08 - Mean Length 213.43 - Mean Loss 1.171 - Mean Q Value 31.03 - Time Delta 45.625Time 2023-05-29T18:49:47




Episode 40000 - Step 625426 - Epsilon 0.8552542211463966 - Mean Reward 687.54 - Mean Length 219.0 - Mean Loss 1.169 - Mean Q Value 31.258 - Time Delta 47.508Time 2023-05-29T18:50:34




Episode 40000 - Step 629496 - Epsilon 0.8543844424424952 - Mean Reward 703.65 - Mean Length 225.72 - Mean Loss 1.157 - Mean Q Value 31.157 - Time Delta 41.948Time 2023-05-29T18:51:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_630000.chkpt at step 630000
Episode 40000 - Step 633185 - Epsilon 0.8535968495264994 - Mean Reward 688.83 - Mean Length 217.31 - Mean Loss 1.155 - Mean Q Value 30.878 - Time Delta 37.946Time 2023-05-29T18:51:54




Episode 40000 - Step 639611 - Epsilon 0.8522266469261969 - Mean Reward 695.23 - Mean Length 231.71 - Mean Loss 1.135 - Mean Q Value 30.403 - Time Delta 66.511Time 2023-05-29T18:53:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_640000.chkpt at step 640000
Episode 40000 - Step 643791 - Epsilon 0.8513365351331827 - Mean Reward 713.88 - Mean Length 229.55 - Mean Loss 1.113 - Mean Q Value 30.132 - Time Delta 43.351Time 2023-05-29T18:53:44




Episode 40000 - Step 649530 - Epsilon 0.8501159557092013 - Mean Reward 742.22 - Mean Length 241.04 - Mean Loss 1.089 - Mean Q Value 29.71 - Time Delta 59.461Time 2023-05-29T18:54:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_650000.chkpt at step 650000
Episode 40000 - Step 654936 - Epsilon 0.8489677998924412 - Mean Reward 754.07 - Mean Length 254.4 - Mean Loss 1.085 - Mean Q Value 29.254 - Time Delta 55.755Time 2023-05-29T18:55:39




Episode 40000 - Step 659682 - Epsilon 0.8479610968165924 - Mean Reward 774.15 - Mean Length 264.97 - Mean Loss 1.064 - Mean Q Value 28.865 - Time Delta 49.302Time 2023-05-29T18:56:28




MarioNet saved to ./checkpoints_pytorch/ mario_net_660000.chkpt at step 660000
Episode 40000 - Step 663416 - Epsilon 0.8471698943848938 - Mean Reward 730.49 - Mean Length 238.05 - Mean Loss 1.09 - Mean Q Value 28.687 - Time Delta 38.808Time 2023-05-29T18:57:07




Episode 40000 - Step 668754 - Epsilon 0.8460401000423978 - Mean Reward 719.65 - Mean Length 249.63 - Mean Loss 1.075 - Mean Q Value 28.412 - Time Delta 54.949Time 2023-05-29T18:58:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_670000.chkpt at step 670000
Episode 40000 - Step 673714 - Epsilon 0.844991660354088 - Mean Reward 701.15 - Mean Length 241.84 - Mean Loss 1.086 - Mean Q Value 28.079 - Time Delta 51.507Time 2023-05-29T18:58:54




Episode 40000 - Step 676283 - Epsilon 0.8444491386282741 - Mean Reward 666.49 - Mean Length 213.47 - Mean Loss 1.093 - Mean Q Value 28.074 - Time Delta 26.814Time 2023-05-29T18:59:21




Episode 40000 - Step 679685 - Epsilon 0.8437312398757538 - Mean Reward 635.95 - Mean Length 200.03 - Mean Loss 1.111 - Mean Q Value 28.269 - Time Delta 35.599Time 2023-05-29T18:59:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_680000.chkpt at step 680000
Episode 40000 - Step 683578 - Epsilon 0.8429104778119613 - Mean Reward 661.02 - Mean Length 201.62 - Mean Loss 1.107 - Mean Q Value 28.454 - Time Delta 40.617Time 2023-05-29T19:00:37




Episode 40000 - Step 686573 - Epsilon 0.8422795847328179 - Mean Reward 628.72 - Mean Length 178.19 - Mean Loss 1.131 - Mean Q Value 28.608 - Time Delta 31.023Time 2023-05-29T19:01:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_690000.chkpt at step 690000
Episode 40000 - Step 691529 - Epsilon 0.8412366464305159 - Mean Reward 649.63 - Mean Length 178.15 - Mean Loss 1.142 - Mean Q Value 29.065 - Time Delta 51.191Time 2023-05-29T19:01:59




Episode 40000 - Step 695069 - Epsilon 0.8404924812469634 - Mean Reward 649.69 - Mean Length 187.86 - Mean Loss 1.155 - Mean Q Value 29.51 - Time Delta 36.653Time 2023-05-29T19:02:36




Episode 40000 - Step 698771 - Epsilon 0.8397149652093182 - Mean Reward 661.57 - Mean Length 190.86 - Mean Loss 1.17 - Mean Q Value 29.814 - Time Delta 38.516Time 2023-05-29T19:03:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_700000.chkpt at step 700000
Episode 40000 - Step 703334 - Epsilon 0.8387576064002995 - Mean Reward 658.49 - Mean Length 197.56 - Mean Loss 1.186 - Mean Q Value 30.201 - Time Delta 47.226Time 2023-05-29T19:04:01




Episode 40000 - Step 706581 - Epsilon 0.838077021098068 - Mean Reward 654.41 - Mean Length 200.08 - Mean Loss 1.179 - Mean Q Value 30.636 - Time Delta 33.862Time 2023-05-29T19:04:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_710000.chkpt at step 710000
Episode 40000 - Step 711421 - Epsilon 0.8370635610426981 - Mean Reward 644.88 - Mean Length 198.92 - Mean Loss 1.17 - Mean Q Value 31.109 - Time Delta 50.431Time 2023-05-29T19:05:26




Episode 40000 - Step 715345 - Epsilon 0.8362428042336457 - Mean Reward 653.72 - Mean Length 202.76 - Mean Loss 1.166 - Mean Q Value 31.475 - Time Delta 41.017Time 2023-05-29T19:06:07




Episode 40000 - Step 718552 - Epsilon 0.8355726151794224 - Mean Reward 645.76 - Mean Length 197.81 - Mean Loss 1.149 - Mean Q Value 31.701 - Time Delta 33.425Time 2023-05-29T19:06:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_720000.chkpt at step 720000
Episode 40000 - Step 723118 - Epsilon 0.8346193530977495 - Mean Reward 653.3 - Mean Length 197.84 - Mean Loss 1.13 - Mean Q Value 31.776 - Time Delta 47.335Time 2023-05-29T19:07:27




Episode 40000 - Step 725566 - Epsilon 0.8341087222587278 - Mean Reward 659.88 - Mean Length 189.85 - Mean Loss 1.131 - Mean Q Value 31.817 - Time Delta 25.491Time 2023-05-29T19:07:53




Episode 40000 - Step 728750 - Epsilon 0.8334450358150028 - Mean Reward 642.13 - Mean Length 173.29 - Mean Loss 1.129 - Mean Q Value 31.891 - Time Delta 33.13Time 2023-05-29T19:08:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_730000.chkpt at step 730000
Episode 40000 - Step 733568 - Epsilon 0.8324417554906902 - Mean Reward 669.86 - Mean Length 182.23 - Mean Loss 1.14 - Mean Q Value 31.832 - Time Delta 49.988Time 2023-05-29T19:09:16




Episode 40000 - Step 736817 - Epsilon 0.8317658791176635 - Mean Reward 670.47 - Mean Length 182.65 - Mean Loss 1.15 - Mean Q Value 31.998 - Time Delta 33.893Time 2023-05-29T19:09:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_740000.chkpt at step 740000
Episode 40000 - Step 740175 - Epsilon 0.8310679045906004 - Mean Reward 635.11 - Mean Length 170.57 - Mean Loss 1.163 - Mean Q Value 32.269 - Time Delta 34.834Time 2023-05-29T19:10:25




Episode 40000 - Step 744043 - Epsilon 0.8302646502624036 - Mean Reward 674.28 - Mean Length 184.77 - Mean Loss 1.206 - Mean Q Value 32.65 - Time Delta 40.333Time 2023-05-29T19:11:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_750000.chkpt at step 750000
Episode 40000 - Step 750701 - Epsilon 0.8288838240906308 - Mean Reward 672.06 - Mean Length 219.51 - Mean Loss 1.243 - Mean Q Value 32.831 - Time Delta 69.351Time 2023-05-29T19:12:14




Episode 40000 - Step 756227 - Epsilon 0.8277395115603239 - Mean Reward 675.44 - Mean Length 226.59 - Mean Loss 1.228 - Mean Q Value 32.829 - Time Delta 57.39Time 2023-05-29T19:13:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_760000.chkpt at step 760000
Episode 40000 - Step 762082 - Epsilon 0.8265287940088495 - Mean Reward 696.23 - Mean Length 252.65 - Mean Loss 1.224 - Mean Q Value 32.403 - Time Delta 60.662Time 2023-05-29T19:14:12




Episode 40000 - Step 765309 - Epsilon 0.8258622607198904 - Mean Reward 688.24 - Mean Length 251.34 - Mean Loss 1.202 - Mean Q Value 31.833 - Time Delta 33.648Time 2023-05-29T19:14:46




Episode 40000 - Step 768495 - Epsilon 0.8252047232460058 - Mean Reward 665.61 - Mean Length 244.52 - Mean Loss 1.134 - Mean Q Value 30.993 - Time Delta 33.061Time 2023-05-29T19:15:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_770000.chkpt at step 770000
Episode 40000 - Step 772447 - Epsilon 0.8243898235042224 - Mean Reward 679.22 - Mean Length 217.46 - Mean Loss 1.092 - Mean Q Value 30.214 - Time Delta 40.902Time 2023-05-29T19:16:00




Episode 40000 - Step 775782 - Epsilon 0.8237027748561065 - Mean Reward 666.53 - Mean Length 195.55 - Mean Loss 1.08 - Mean Q Value 29.811 - Time Delta 34.88Time 2023-05-29T19:16:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_780000.chkpt at step 780000
Episode 40000 - Step 781385 - Epsilon 0.8225497807667027 - Mean Reward 679.48 - Mean Length 193.03 - Mean Loss 1.069 - Mean Q Value 29.618 - Time Delta 57.872Time 2023-05-29T19:17:33




Episode 40000 - Step 784671 - Epsilon 0.8218743335152158 - Mean Reward 699.52 - Mean Length 193.62 - Mean Loss 1.064 - Mean Q Value 29.372 - Time Delta 34.368Time 2023-05-29T19:18:07




Episode 40000 - Step 788923 - Epsilon 0.8210011451712462 - Mean Reward 703.8 - Mean Length 204.28 - Mean Loss 1.073 - Mean Q Value 29.445 - Time Delta 44.437Time 2023-05-29T19:18:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_790000.chkpt at step 790000
Episode 40000 - Step 793558 - Epsilon 0.8200503609422928 - Mean Reward 680.25 - Mean Length 211.11 - Mean Loss 1.071 - Mean Q Value 29.606 - Time Delta 48.411Time 2023-05-29T19:19:40




Episode 40000 - Step 797473 - Epsilon 0.8192481292073257 - Mean Reward 655.36 - Mean Length 216.91 - Mean Loss 1.062 - Mean Q Value 29.729 - Time Delta 40.668Time 2023-05-29T19:20:21




MarioNet saved to ./checkpoints_pytorch/ mario_net_800000.chkpt at step 800000
Episode 40000 - Step 801203 - Epsilon 0.8184845363118028 - Mean Reward 639.52 - Mean Length 198.18 - Mean Loss 1.062 - Mean Q Value 30.048 - Time Delta 38.8Time 2023-05-29T19:21:00




Episode 40000 - Step 807123 - Epsilon 0.817274075008898 - Mean Reward 665.53 - Mean Length 224.52 - Mean Loss 1.078 - Mean Q Value 30.668 - Time Delta 61.081Time 2023-05-29T19:22:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_810000.chkpt at step 810000
Episode 40000 - Step 812003 - Epsilon 0.8162776084808713 - Mean Reward 671.44 - Mean Length 230.8 - Mean Loss 1.093 - Mean Q Value 30.799 - Time Delta 50.74Time 2023-05-29T19:22:51




Episode 40000 - Step 816838 - Epsilon 0.8152915288801393 - Mean Reward 686.77 - Mean Length 232.8 - Mean Loss 1.071 - Mean Q Value 30.423 - Time Delta 49.951Time 2023-05-29T19:23:41




Episode 40000 - Step 819431 - Epsilon 0.8147631873476276 - Mean Reward 679.72 - Mean Length 219.58 - Mean Loss 1.064 - Mean Q Value 30.114 - Time Delta 27.009Time 2023-05-29T19:24:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_820000.chkpt at step 820000
Episode 40000 - Step 823682 - Epsilon 0.8138977576111546 - Mean Reward 662.64 - Mean Length 224.79 - Mean Loss 1.048 - Mean Q Value 29.84 - Time Delta 44.276Time 2023-05-29T19:24:53




Episode 40000 - Step 827642 - Epsilon 0.8130923974493944 - Mean Reward 639.39 - Mean Length 205.19 - Mean Loss 1.029 - Mean Q Value 29.406 - Time Delta 41.034Time 2023-05-29T19:25:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_830000.chkpt at step 830000
Episode 40000 - Step 831094 - Epsilon 0.8123910013185861 - Mean Reward 631.33 - Mean Length 190.91 - Mean Loss 1.022 - Mean Q Value 29.36 - Time Delta 35.839Time 2023-05-29T19:26:09




Episode 40000 - Step 835294 - Epsilon 0.8115384383344107 - Mean Reward 643.21 - Mean Length 184.56 - Mean Loss 1.041 - Mean Q Value 29.823 - Time Delta 43.577Time 2023-05-29T19:26:53




Episode 40000 - Step 839049 - Epsilon 0.8107769640030691 - Mean Reward 673.4 - Mean Length 196.18 - Mean Loss 1.049 - Mean Q Value 30.192 - Time Delta 38.602Time 2023-05-29T19:27:32




MarioNet saved to ./checkpoints_pytorch/ mario_net_840000.chkpt at step 840000
Episode 40000 - Step 843996 - Epsilon 0.8097748552741524 - Mean Reward 706.5 - Mean Length 203.14 - Mean Loss 1.062 - Mean Q Value 30.441 - Time Delta 51.43Time 2023-05-29T19:28:23




Episode 40000 - Step 849656 - Epsilon 0.8086298340042503 - Mean Reward 727.05 - Mean Length 220.14 - Mean Loss 1.071 - Mean Q Value 30.736 - Time Delta 58.764Time 2023-05-29T19:29:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_850000.chkpt at step 850000
Episode 40000 - Step 853317 - Epsilon 0.8078900740403947 - Mean Reward 734.57 - Mean Length 222.23 - Mean Loss 1.09 - Mean Q Value 31.046 - Time Delta 38.245Time 2023-05-29T19:30:00




Episode 40000 - Step 856266 - Epsilon 0.8072946765141812 - Mean Reward 719.04 - Mean Length 209.72 - Mean Loss 1.115 - Mean Q Value 31.512 - Time Delta 30.844Time 2023-05-29T19:30:31




Episode 40000 - Step 859696 - Epsilon 0.8066027179621069 - Mean Reward 712.93 - Mean Length 206.47 - Mean Loss 1.119 - Mean Q Value 31.985 - Time Delta 35.877Time 2023-05-29T19:31:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_860000.chkpt at step 860000
Episode 40000 - Step 863222 - Epsilon 0.8058920108674615 - Mean Reward 688.28 - Mean Length 192.26 - Mean Loss 1.135 - Mean Q Value 32.512 - Time Delta 36.395Time 2023-05-29T19:31:43




Episode 40000 - Step 866657 - Epsilon 0.8052002480847195 - Mean Reward 656.2 - Mean Length 170.01 - Mean Loss 1.165 - Mean Q Value 33.009 - Time Delta 35.646Time 2023-05-29T19:32:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_870000.chkpt at step 870000
Episode 40000 - Step 870923 - Epsilon 0.8043419596763808 - Mean Reward 667.63 - Mean Length 176.06 - Mean Loss 1.178 - Mean Q Value 33.491 - Time Delta 44.271Time 2023-05-29T19:33:03




Episode 40000 - Step 875801 - Epsilon 0.8033616623916383 - Mean Reward 676.91 - Mean Length 195.35 - Mean Loss 1.184 - Mean Q Value 33.595 - Time Delta 50.277Time 2023-05-29T19:33:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_880000.chkpt at step 880000
Episode 40000 - Step 880013 - Epsilon 0.8025161676864239 - Mean Reward 697.79 - Mean Length 203.17 - Mean Loss 1.224 - Mean Q Value 33.561 - Time Delta 44.036Time 2023-05-29T19:34:37




Episode 40000 - Step 883316 - Epsilon 0.8018537634058694 - Mean Reward 692.82 - Mean Length 200.94 - Mean Loss 1.238 - Mean Q Value 33.632 - Time Delta 34.268Time 2023-05-29T19:35:12




Episode 40000 - Step 886998 - Epsilon 0.8011159965336289 - Mean Reward 715.53 - Mean Length 203.41 - Mean Loss 1.242 - Mean Q Value 33.785 - Time Delta 38.489Time 2023-05-29T19:35:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_890000.chkpt at step 890000
Episode 40000 - Step 890169 - Epsilon 0.8004811634131594 - Mean Reward 694.8 - Mean Length 192.46 - Mean Loss 1.236 - Mean Q Value 34.158 - Time Delta 33.216Time 2023-05-29T19:36:23




Episode 40000 - Step 894149 - Epsilon 0.799685080672784 - Mean Reward 712.57 - Mean Length 183.48 - Mean Loss 1.251 - Mean Q Value 34.764 - Time Delta 41.472Time 2023-05-29T19:37:05




Episode 40000 - Step 896863 - Epsilon 0.7991426783084766 - Mean Reward 650.84 - Mean Length 168.5 - Mean Loss 1.234 - Mean Q Value 35.381 - Time Delta 28.212Time 2023-05-29T19:37:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_900000.chkpt at step 900000
Episode 40000 - Step 900715 - Epsilon 0.7983734742442077 - Mean Reward 643.56 - Mean Length 173.99 - Mean Loss 1.238 - Mean Q Value 35.614 - Time Delta 40.009Time 2023-05-29T19:38:13




Episode 40000 - Step 905225 - Epsilon 0.797473815317443 - Mean Reward 653.28 - Mean Length 182.27 - Mean Loss 1.25 - Mean Q Value 35.594 - Time Delta 46.743Time 2023-05-29T19:39:00




Episode 40000 - Step 908958 - Epsilon 0.796729919960369 - Mean Reward 665.96 - Mean Length 187.89 - Mean Loss 1.253 - Mean Q Value 35.467 - Time Delta 38.749Time 2023-05-29T19:39:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_910000.chkpt at step 910000
Episode 40000 - Step 912838 - Epsilon 0.7959574665418181 - Mean Reward 640.42 - Mean Length 186.89 - Mean Loss 1.245 - Mean Q Value 35.163 - Time Delta 40.139Time 2023-05-29T19:40:19




Episode 40000 - Step 917929 - Epsilon 0.7949450559590359 - Mean Reward 692.3 - Mean Length 210.66 - Mean Loss 1.257 - Mean Q Value 35.081 - Time Delta 53.055Time 2023-05-29T19:41:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_920000.chkpt at step 920000
Episode 40000 - Step 921809 - Epsilon 0.7941743330193642 - Mean Reward 724.94 - Mean Length 210.94 - Mean Loss 1.244 - Mean Q Value 34.972 - Time Delta 40.452Time 2023-05-29T19:41:52




Episode 40000 - Step 925976 - Epsilon 0.7933474325910058 - Mean Reward 715.31 - Mean Length 207.51 - Mean Loss 1.211 - Mean Q Value 34.926 - Time Delta 43.376Time 2023-05-29T19:42:36




Episode 40000 - Step 929389 - Epsilon 0.7926707975199605 - Mean Reward 696.64 - Mean Length 204.31 - Mean Loss 1.189 - Mean Q Value 34.732 - Time Delta 35.624Time 2023-05-29T19:43:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_930000.chkpt at step 930000
Episode 40000 - Step 934228 - Epsilon 0.7917124437038767 - Mean Reward 706.44 - Mean Length 213.9 - Mean Loss 1.183 - Mean Q Value 34.424 - Time Delta 47.947Time 2023-05-29T19:43:59




Episode 40000 - Step 937165 - Epsilon 0.7911313421323889 - Mean Reward 677.66 - Mean Length 192.36 - Mean Loss 1.173 - Mean Q Value 33.895 - Time Delta 29.695Time 2023-05-29T19:44:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_940000.chkpt at step 940000
Episode 40000 - Step 942369 - Epsilon 0.7901027493722279 - Mean Reward 680.59 - Mean Length 205.6 - Mean Loss 1.161 - Mean Q Value 33.555 - Time Delta 53.512Time 2023-05-29T19:45:22




Episode 40000 - Step 947346 - Epsilon 0.7891202752518386 - Mean Reward 673.26 - Mean Length 213.7 - Mean Loss 1.166 - Mean Q Value 33.262 - Time Delta 51.459Time 2023-05-29T19:46:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_950000.chkpt at step 950000
Episode 40000 - Step 950403 - Epsilon 0.7885174204012877 - Mean Reward 671.77 - Mean Length 210.14 - Mean Loss 1.154 - Mean Q Value 32.817 - Time Delta 32.027Time 2023-05-29T19:46:46




Episode 40000 - Step 956791 - Epsilon 0.7872591629094786 - Mean Reward 667.86 - Mean Length 225.63 - Mean Loss 1.124 - Mean Q Value 32.329 - Time Delta 66.207Time 2023-05-29T19:47:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_960000.chkpt at step 960000
Episode 40000 - Step 960032 - Epsilon 0.7866215444430039 - Mean Reward 662.24 - Mean Length 228.67 - Mean Loss 1.11 - Mean Q Value 31.866 - Time Delta 33.947Time 2023-05-29T19:48:26




Episode 40000 - Step 963511 - Epsilon 0.7859376777088013 - Mean Reward 641.49 - Mean Length 211.42 - Mean Loss 1.115 - Mean Q Value 31.548 - Time Delta 36.201Time 2023-05-29T19:49:02




Episode 40000 - Step 967818 - Epsilon 0.7850918746494197 - Mean Reward 646.41 - Mean Length 204.72 - Mean Loss 1.094 - Mean Q Value 31.216 - Time Delta 44.604Time 2023-05-29T19:49:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_970000.chkpt at step 970000
Episode 40000 - Step 971405 - Epsilon 0.7843881589984288 - Mean Reward 652.44 - Mean Length 210.02 - Mean Loss 1.116 - Mean Q Value 30.987 - Time Delta 37.332Time 2023-05-29T19:50:24




Episode 40000 - Step 974269 - Epsilon 0.7838267380190234 - Mean Reward 644.7 - Mean Length 174.78 - Mean Loss 1.128 - Mean Q Value 31.108 - Time Delta 29.952Time 2023-05-29T19:50:54




Episode 40000 - Step 977732 - Epsilon 0.7831484335990933 - Mean Reward 673.07 - Mean Length 177.0 - Mean Loss 1.123 - Mean Q Value 31.456 - Time Delta 36.025Time 2023-05-29T19:51:30




MarioNet saved to ./checkpoints_pytorch/ mario_net_980000.chkpt at step 980000
Episode 40000 - Step 982125 - Epsilon 0.782288812849968 - Mean Reward 673.5 - Mean Length 186.14 - Mean Loss 1.094 - Mean Q Value 31.635 - Time Delta 45.508Time 2023-05-29T19:52:16




Episode 40000 - Step 986635 - Epsilon 0.7814072791611721 - Mean Reward 709.27 - Mean Length 188.17 - Mean Loss 1.094 - Mean Q Value 31.948 - Time Delta 46.79Time 2023-05-29T19:53:02




Episode 40000 - Step 989598 - Epsilon 0.780828665977042 - Mean Reward 701.84 - Mean Length 181.93 - Mean Loss 1.094 - Mean Q Value 32.641 - Time Delta 30.613Time 2023-05-29T19:53:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_990000.chkpt at step 990000
Episode 40000 - Step 994352 - Epsilon 0.7799012022459578 - Mean Reward 716.04 - Mean Length 200.83 - Mean Loss 1.116 - Mean Q Value 33.094 - Time Delta 49.572Time 2023-05-29T19:54:23




Episode 40000 - Step 998993 - Epsilon 0.778996846503706 - Mean Reward 730.16 - Mean Length 212.61 - Mean Loss 1.15 - Mean Q Value 33.623 - Time Delta 48.387Time 2023-05-29T19:55:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_1000000.chkpt at step 1000000
Episode 40000 - Step 1002181 - Epsilon 0.7783762332865953 - Mean Reward 719.71 - Mean Length 200.56 - Mean Loss 1.216 - Mean Q Value 34.182 - Time Delta 33.208Time 2023-05-29T19:55:44




Episode 40000 - Step 1005954 - Epsilon 0.7776423759725793 - Mean Reward 684.48 - Mean Length 193.19 - Mean Loss 1.243 - Mean Q Value 34.685 - Time Delta 39.274Time 2023-05-29T19:56:24




Episode 40000 - Step 1008227 - Epsilon 0.777200606166883 - Mean Reward 671.55 - Mean Length 186.29 - Mean Loss 1.274 - Mean Q Value 35.027 - Time Delta 23.798Time 2023-05-29T19:56:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_1010000.chkpt at step 1010000
Episode 40000 - Step 1010763 - Epsilon 0.7767080170881674 - Mean Reward 641.09 - Mean Length 164.11 - Mean Loss 1.273 - Mean Q Value 35.453 - Time Delta 26.327Time 2023-05-29T19:57:14




Episode 40000 - Step 1015903 - Epsilon 0.7757105881467304 - Mean Reward 631.25 - Mean Length 169.1 - Mean Loss 1.279 - Mean Q Value 35.514 - Time Delta 53.019Time 2023-05-29T19:58:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_1020000.chkpt at step 1020000
Episode 40000 - Step 1020354 - Epsilon 0.7748478961510882 - Mean Reward 653.93 - Mean Length 181.73 - Mean Loss 1.245 - Mean Q Value 35.415 - Time Delta 46.486Time 2023-05-29T19:58:53




Episode 40000 - Step 1024382 - Epsilon 0.7740680169571476 - Mean Reward 667.21 - Mean Length 184.28 - Mean Loss 1.264 - Mean Q Value 35.317 - Time Delta 42.137Time 2023-05-29T19:59:35




Episode 40000 - Step 1027740 - Epsilon 0.7734184594655324 - Mean Reward 694.14 - Mean Length 195.13 - Mean Loss 1.252 - Mean Q Value 35.074 - Time Delta 35.191Time 2023-05-29T20:00:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_1030000.chkpt at step 1030000
Episode 40000 - Step 1031394 - Epsilon 0.7727122642182245 - Mean Reward 722.89 - Mean Length 206.31 - Mean Loss 1.253 - Mean Q Value 34.75 - Time Delta 38.1Time 2023-05-29T20:00:49




Episode 40000 - Step 1034640 - Epsilon 0.772085462496571 - Mean Reward 705.52 - Mean Length 187.37 - Mean Loss 1.241 - Mean Q Value 34.613 - Time Delta 33.904Time 2023-05-29T20:01:22




Episode 40000 - Step 1037871 - Epsilon 0.7714620621959186 - Mean Reward 683.52 - Mean Length 175.17 - Mean Loss 1.23 - Mean Q Value 34.902 - Time Delta 33.725Time 2023-05-29T20:01:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_1040000.chkpt at step 1040000
Episode 40000 - Step 1042136 - Epsilon 0.7706399290468442 - Mean Reward 697.53 - Mean Length 177.54 - Mean Loss 1.215 - Mean Q Value 35.186 - Time Delta 44.466Time 2023-05-29T20:02:41




Episode 40000 - Step 1046297 - Epsilon 0.7698386875783585 - Mean Reward 718.53 - Mean Length 185.57 - Mean Loss 1.219 - Mean Q Value 35.506 - Time Delta 43.069Time 2023-05-29T20:03:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_1050000.chkpt at step 1050000
Episode 40000 - Step 1050625 - Epsilon 0.7690061724861043 - Mean Reward 713.32 - Mean Length 192.31 - Mean Loss 1.214 - Mean Q Value 35.686 - Time Delta 44.872Time 2023-05-29T20:04:09




Episode 40000 - Step 1053961 - Epsilon 0.7683650886271546 - Mean Reward 723.69 - Mean Length 193.21 - Mean Loss 1.216 - Mean Q Value 35.823 - Time Delta 34.765Time 2023-05-29T20:04:43




Episode 40000 - Step 1057284 - Epsilon 0.767727034318435 - Mean Reward 720.35 - Mean Length 194.13 - Mean Loss 1.231 - Mean Q Value 35.897 - Time Delta 34.629Time 2023-05-29T20:05:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_1060000.chkpt at step 1060000
Episode 40000 - Step 1061394 - Epsilon 0.766938599819593 - Mean Reward 687.28 - Mean Length 192.58 - Mean Loss 1.237 - Mean Q Value 35.99 - Time Delta 42.864Time 2023-05-29T20:06:01




Episode 40000 - Step 1065899 - Epsilon 0.7660753213384625 - Mean Reward 671.66 - Mean Length 196.02 - Mean Loss 1.235 - Mean Q Value 35.988 - Time Delta 47.015Time 2023-05-29T20:06:48




Episode 40000 - Step 1068704 - Epsilon 0.7655382992675418 - Mean Reward 654.82 - Mean Length 180.79 - Mean Loss 1.222 - Mean Q Value 36.0 - Time Delta 29.133Time 2023-05-29T20:07:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_1070000.chkpt at step 1070000
Episode 40000 - Step 1071823 - Epsilon 0.764941603371063 - Mean Reward 644.69 - Mean Length 178.62 - Mean Loss 1.212 - Mean Q Value 36.007 - Time Delta 32.406Time 2023-05-29T20:07:49




Episode 40000 - Step 1075197 - Epsilon 0.7642966470964593 - Mean Reward 657.09 - Mean Length 179.13 - Mean Loss 1.192 - Mean Q Value 35.96 - Time Delta 35.138Time 2023-05-29T20:08:25




Episode 40000 - Step 1077785 - Epsilon 0.7638023070401725 - Mean Reward 634.99 - Mean Length 163.91 - Mean Loss 1.157 - Mean Q Value 35.855 - Time Delta 27.058Time 2023-05-29T20:08:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_1080000.chkpt at step 1080000
Episode 40000 - Step 1080428 - Epsilon 0.7632977913004044 - Mean Reward 618.82 - Mean Length 145.29 - Mean Loss 1.125 - Mean Q Value 35.914 - Time Delta 27.424Time 2023-05-29T20:09:19




Episode 40000 - Step 1085048 - Epsilon 0.7624166911744642 - Mean Reward 637.3 - Mean Length 163.44 - Mean Loss 1.125 - Mean Q Value 36.115 - Time Delta 48.034Time 2023-05-29T20:10:07




Episode 40000 - Step 1089570 - Epsilon 0.7615552660096145 - Mean Reward 681.3 - Mean Length 177.47 - Mean Loss 1.107 - Mean Q Value 36.378 - Time Delta 47.086Time 2023-05-29T20:10:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_1090000.chkpt at step 1090000
Episode 40000 - Step 1094103 - Epsilon 0.76069272222765 - Mean Reward 714.96 - Mean Length 189.06 - Mean Loss 1.119 - Mean Q Value 36.507 - Time Delta 47.128Time 2023-05-29T20:11:41




Episode 40000 - Step 1097760 - Epsilon 0.7599975766362143 - Mean Reward 740.38 - Mean Length 199.75 - Mean Loss 1.126 - Mean Q Value 36.598 - Time Delta 38.266Time 2023-05-29T20:12:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_1100000.chkpt at step 1100000
Episode 40000 - Step 1101985 - Epsilon 0.7591952528973049 - Mean Reward 752.85 - Mean Length 215.57 - Mean Loss 1.152 - Mean Q Value 36.661 - Time Delta 44.118Time 2023-05-29T20:13:04




Episode 40000 - Step 1105230 - Epsilon 0.7585796054271204 - Mean Reward 759.64 - Mean Length 201.82 - Mean Loss 1.152 - Mean Q Value 36.716 - Time Delta 33.762Time 2023-05-29T20:13:37




Episode 40000 - Step 1109300 - Epsilon 0.7578081431300621 - Mean Reward 725.67 - Mean Length 197.3 - Mean Loss 1.181 - Mean Q Value 36.85 - Time Delta 42.309Time 2023-05-29T20:14:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_1110000.chkpt at step 1110000
Episode 40000 - Step 1112132 - Epsilon 0.7572718047837891 - Mean Reward 680.15 - Mean Length 180.29 - Mean Loss 1.217 - Mean Q Value 37.028 - Time Delta 29.446Time 2023-05-29T20:14:49




Episode 40000 - Step 1117164 - Epsilon 0.7563197556986643 - Mean Reward 718.52 - Mean Length 194.04 - Mean Loss 1.233 - Mean Q Value 37.142 - Time Delta 52.287Time 2023-05-29T20:15:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_1120000.chkpt at step 1120000
Episode 40000 - Step 1121505 - Epsilon 0.755499404804551 - Mean Reward 734.56 - Mean Length 195.2 - Mean Loss 1.223 - Mean Q Value 37.033 - Time Delta 45.197Time 2023-05-29T20:16:27




Episode 40000 - Step 1126748 - Epsilon 0.7545097825512327 - Mean Reward 723.89 - Mean Length 215.18 - Mean Loss 1.217 - Mean Q Value 36.716 - Time Delta 54.284Time 2023-05-29T20:17:21




MarioNet saved to ./checkpoints_pytorch/ mario_net_1130000.chkpt at step 1130000
Episode 40000 - Step 1130497 - Epsilon 0.7538029494604815 - Mean Reward 732.85 - Mean Length 211.97 - Mean Loss 1.201 - Mean Q Value 36.177 - Time Delta 39.172Time 2023-05-29T20:18:00




Episode 40000 - Step 1133425 - Epsilon 0.7532513675360637 - Mean Reward 729.48 - Mean Length 212.93 - Mean Loss 1.172 - Mean Q Value 35.699 - Time Delta 30.638Time 2023-05-29T20:18:31




Episode 40000 - Step 1138319 - Epsilon 0.7523303279334932 - Mean Reward 713.53 - Mean Length 211.55 - Mean Loss 1.156 - Mean Q Value 35.311 - Time Delta 48.239Time 2023-05-29T20:19:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_1140000.chkpt at step 1140000
Episode 40000 - Step 1143367 - Epsilon 0.7513814857862262 - Mean Reward 739.32 - Mean Length 218.62 - Mean Loss 1.149 - Mean Q Value 35.041 - Time Delta 52.443Time 2023-05-29T20:20:11




Episode 40000 - Step 1146453 - Epsilon 0.7508020184556926 - Mean Reward 737.51 - Mean Length 197.05 - Mean Loss 1.155 - Mean Q Value 34.954 - Time Delta 32.243Time 2023-05-29T20:20:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_1150000.chkpt at step 1150000
Episode 40000 - Step 1150148 - Epsilon 0.7501087852407794 - Mean Reward 755.25 - Mean Length 196.51 - Mean Loss 1.155 - Mean Q Value 35.03 - Time Delta 38.32Time 2023-05-29T20:21:22




Episode 40000 - Step 1153877 - Epsilon 0.7494098220930926 - Mean Reward 787.46 - Mean Length 204.52 - Mean Loss 1.163 - Mean Q Value 35.136 - Time Delta 38.807Time 2023-05-29T20:22:01




Episode 40000 - Step 1158029 - Epsilon 0.7486323381844223 - Mean Reward 797.57 - Mean Length 197.1 - Mean Loss 1.169 - Mean Q Value 35.388 - Time Delta 43.427Time 2023-05-29T20:22:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_1160000.chkpt at step 1160000
Episode 40000 - Step 1161663 - Epsilon 0.7479525144767862 - Mean Reward 769.89 - Mean Length 182.96 - Mean Loss 1.201 - Mean Q Value 35.777 - Time Delta 37.813Time 2023-05-29T20:23:22




Episode 40000 - Step 1164679 - Epsilon 0.7473887707684101 - Mean Reward 763.55 - Mean Length 182.26 - Mean Loss 1.245 - Mean Q Value 36.482 - Time Delta 31.367Time 2023-05-29T20:23:53




Episode 40000 - Step 1168221 - Epsilon 0.7467272508602787 - Mean Reward 748.96 - Mean Length 180.73 - Mean Loss 1.289 - Mean Q Value 37.319 - Time Delta 37.066Time 2023-05-29T20:24:30




MarioNet saved to ./checkpoints_pytorch/ mario_net_1170000.chkpt at step 1170000
Episode 40000 - Step 1171459 - Epsilon 0.7461230196706635 - Mean Reward 728.78 - Mean Length 175.82 - Mean Loss 1.309 - Mean Q Value 38.012 - Time Delta 33.968Time 2023-05-29T20:25:04




Episode 40000 - Step 1174342 - Epsilon 0.7455854451886367 - Mean Reward 682.26 - Mean Length 163.13 - Mean Loss 1.341 - Mean Q Value 38.704 - Time Delta 29.934Time 2023-05-29T20:25:34




Episode 40000 - Step 1177399 - Epsilon 0.745015849125491 - Mean Reward 657.44 - Mean Length 157.36 - Mean Loss 1.34 - Mean Q Value 39.431 - Time Delta 31.975Time 2023-05-29T20:26:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_1180000.chkpt at step 1180000
Episode 40000 - Step 1181614 - Epsilon 0.7442312020598068 - Mean Reward 680.45 - Mean Length 169.35 - Mean Loss 1.347 - Mean Q Value 39.876 - Time Delta 43.882Time 2023-05-29T20:26:50




Episode 40000 - Step 1185595 - Epsilon 0.7434908743300072 - Mean Reward 695.07 - Mean Length 173.74 - Mean Loss 1.335 - Mean Q Value 40.154 - Time Delta 41.085Time 2023-05-29T20:27:31




Episode 40000 - Step 1189435 - Epsilon 0.742777465492405 - Mean Reward 693.9 - Mean Length 179.76 - Mean Loss 1.327 - Mean Q Value 40.358 - Time Delta 40.007Time 2023-05-29T20:28:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_1190000.chkpt at step 1190000
Episode 40000 - Step 1191799 - Epsilon 0.7423386136488744 - Mean Reward 677.51 - Mean Length 174.57 - Mean Loss 1.341 - Mean Q Value 40.486 - Time Delta 24.682Time 2023-05-29T20:28:36




Episode 40000 - Step 1195434 - Epsilon 0.741664319778021 - Mean Reward 686.39 - Mean Length 180.35 - Mean Loss 1.33 - Mean Q Value 40.404 - Time Delta 37.807Time 2023-05-29T20:29:14




Episode 40000 - Step 1199525 - Epsilon 0.7409061702648888 - Mean Reward 693.04 - Mean Length 179.11 - Mean Loss 1.289 - Mean Q Value 40.306 - Time Delta 42.538Time 2023-05-29T20:29:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_1200000.chkpt at step 1200000
Episode 40000 - Step 1202881 - Epsilon 0.7402848106071044 - Mean Reward 654.6 - Mean Length 172.86 - Mean Loss 1.289 - Mean Q Value 40.105 - Time Delta 35.095Time 2023-05-29T20:30:31




Episode 40000 - Step 1206359 - Epsilon 0.7396414126415659 - Mean Reward 668.06 - Mean Length 169.24 - Mean Loss 1.287 - Mean Q Value 39.898 - Time Delta 36.415Time 2023-05-29T20:31:08




Episode 40000 - Step 1209763 - Epsilon 0.7390122454692761 - Mean Reward 693.13 - Mean Length 179.64 - Mean Loss 1.248 - Mean Q Value 39.643 - Time Delta 35.256Time 2023-05-29T20:31:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_1210000.chkpt at step 1210000
Episode 40000 - Step 1213068 - Epsilon 0.738401888713437 - Mean Reward 690.36 - Mean Length 176.34 - Mean Loss 1.263 - Mean Q Value 39.292 - Time Delta 34.561Time 2023-05-29T20:32:18




Episode 40000 - Step 1217313 - Epsilon 0.7376186752771674 - Mean Reward 685.78 - Mean Length 177.88 - Mean Loss 1.285 - Mean Q Value 39.002 - Time Delta 44.089Time 2023-05-29T20:33:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_1220000.chkpt at step 1220000
Episode 40000 - Step 1220323 - Epsilon 0.7370638259428278 - Mean Reward 698.76 - Mean Length 174.42 - Mean Loss 1.289 - Mean Q Value 38.766 - Time Delta 31.02Time 2023-05-29T20:33:33




Episode 40000 - Step 1224166 - Epsilon 0.7363560318445126 - Mean Reward 705.34 - Mean Length 178.07 - Mean Loss 1.305 - Mean Q Value 38.751 - Time Delta 40.032Time 2023-05-29T20:34:13




Episode 40000 - Step 1228210 - Epsilon 0.7356119519989438 - Mean Reward 715.82 - Mean Length 184.47 - Mean Loss 1.342 - Mean Q Value 38.603 - Time Delta 42.218Time 2023-05-29T20:34:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_1230000.chkpt at step 1230000
Episode 40000 - Step 1231881 - Epsilon 0.7349371537409695 - Mean Reward 727.39 - Mean Length 188.13 - Mean Loss 1.342 - Mean Q Value 38.697 - Time Delta 38.256Time 2023-05-29T20:35:33




Episode 40000 - Step 1235316 - Epsilon 0.7343062972940059 - Mean Reward 696.5 - Mean Length 180.03 - Mean Loss 1.349 - Mean Q Value 38.711 - Time Delta 35.849Time 2023-05-29T20:36:09




Episode 40000 - Step 1238179 - Epsilon 0.7337809055429728 - Mean Reward 683.53 - Mean Length 178.56 - Mean Loss 1.335 - Mean Q Value 38.71 - Time Delta 29.886Time 2023-05-29T20:36:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_1240000.chkpt at step 1240000
Episode 40000 - Step 1242466 - Epsilon 0.7329948970366442 - Mean Reward 662.24 - Mean Length 183.0 - Mean Loss 1.327 - Mean Q Value 38.633 - Time Delta 44.771Time 2023-05-29T20:37:24




Episode 40000 - Step 1246233 - Epsilon 0.732304923948089 - Mean Reward 667.74 - Mean Length 180.23 - Mean Loss 1.31 - Mean Q Value 38.633 - Time Delta 39.363Time 2023-05-29T20:38:03




Episode 40000 - Step 1249599 - Epsilon 0.7316889484855247 - Mean Reward 662.55 - Mean Length 177.18 - Mean Loss 1.29 - Mean Q Value 38.508 - Time Delta 35.183Time 2023-05-29T20:38:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_1250000.chkpt at step 1250000
Episode 40000 - Step 1255283 - Epsilon 0.7306499567381757 - Mean Reward 712.8 - Mean Length 199.67 - Mean Loss 1.272 - Mean Q Value 38.295 - Time Delta 58.659Time 2023-05-29T20:39:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_1260000.chkpt at step 1260000
Episode 40000 - Step 1260102 - Epsilon 0.7297702361208153 - Mean Reward 734.83 - Mean Length 219.23 - Mean Loss 1.261 - Mean Q Value 38.0 - Time Delta 50.332Time 2023-05-29T20:40:27




Episode 40000 - Step 1264331 - Epsilon 0.7289990941589253 - Mean Reward 768.5 - Mean Length 218.65 - Mean Loss 1.237 - Mean Q Value 37.398 - Time Delta 44.235Time 2023-05-29T20:41:12




Episode 40000 - Step 1267865 - Epsilon 0.7283553078131665 - Mean Reward 767.43 - Mean Length 216.32 - Mean Loss 1.208 - Mean Q Value 36.877 - Time Delta 37.024Time 2023-05-29T20:41:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_1270000.chkpt at step 1270000
Episode 40000 - Step 1272704 - Epsilon 0.727474712626751 - Mean Reward 777.56 - Mean Length 231.05 - Mean Loss 1.19 - Mean Q Value 36.466 - Time Delta 50.515Time 2023-05-29T20:42:39




Episode 40000 - Step 1275854 - Epsilon 0.7269020517337167 - Mean Reward 722.51 - Mean Length 205.71 - Mean Loss 1.15 - Mean Q Value 35.964 - Time Delta 32.981Time 2023-05-29T20:43:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_1280000.chkpt at step 1280000
Episode 40000 - Step 1280224 - Epsilon 0.7261083447842412 - Mean Reward 743.83 - Mean Length 201.22 - Mean Loss 1.137 - Mean Q Value 35.585 - Time Delta 45.44Time 2023-05-29T20:43:58




Episode 40000 - Step 1284031 - Epsilon 0.7254175998410447 - Mean Reward 724.36 - Mean Length 197.0 - Mean Loss 1.148 - Mean Q Value 35.516 - Time Delta 38.995Time 2023-05-29T20:44:37




Episode 40000 - Step 1289095 - Epsilon 0.7244998021333238 - Mean Reward 737.55 - Mean Length 212.3 - Mean Loss 1.168 - Mean Q Value 35.415 - Time Delta 52.254Time 2023-05-29T20:45:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_1290000.chkpt at step 1290000
Episode 40000 - Step 1293064 - Epsilon 0.7237812736536374 - Mean Reward 724.1 - Mean Length 203.6 - Mean Loss 1.192 - Mean Q Value 35.146 - Time Delta 41.313Time 2023-05-29T20:46:10




Episode 40000 - Step 1296316 - Epsilon 0.7231930785382623 - Mean Reward 712.68 - Mean Length 204.62 - Mean Loss 1.234 - Mean Q Value 35.188 - Time Delta 34.024Time 2023-05-29T20:46:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_1300000.chkpt at step 1300000
Episode 40000 - Step 1300583 - Epsilon 0.7224220235598129 - Mean Reward 684.98 - Mean Length 203.59 - Mean Loss 1.227 - Mean Q Value 35.042 - Time Delta 44.175Time 2023-05-29T20:47:28




Episode 40000 - Step 1304386 - Epsilon 0.7217355071394603 - Mean Reward 672.51 - Mean Length 203.55 - Mean Loss 1.226 - Mean Q Value 34.869 - Time Delta 39.628Time 2023-05-29T20:48:08




Episode 40000 - Step 1308930 - Epsilon 0.7209160810229587 - Mean Reward 661.13 - Mean Length 198.35 - Mean Loss 1.198 - Mean Q Value 34.651 - Time Delta 47.246Time 2023-05-29T20:48:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_1310000.chkpt at step 1310000
Episode 40000 - Step 1313383 - Epsilon 0.7201139676545762 - Mean Reward 679.36 - Mean Length 203.19 - Mean Loss 1.18 - Mean Q Value 34.492 - Time Delta 46.342Time 2023-05-29T20:49:42




Episode 40000 - Step 1317453 - Epsilon 0.7193816242437346 - Mean Reward 714.07 - Mean Length 211.37 - Mean Loss 1.164 - Mean Q Value 34.28 - Time Delta 42.277Time 2023-05-29T20:50:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_1320000.chkpt at step 1320000
Episode 40000 - Step 1321562 - Epsilon 0.7186430188089685 - Mean Reward 723.96 - Mean Length 209.79 - Mean Loss 1.175 - Mean Q Value 34.216 - Time Delta 42.718Time 2023-05-29T20:51:07




Episode 40000 - Step 1324526 - Epsilon 0.7181107015133319 - Mean Reward 711.57 - Mean Length 201.4 - Mean Loss 1.144 - Mean Q Value 34.111 - Time Delta 30.256Time 2023-05-29T20:51:37




Episode 40000 - Step 1328612 - Epsilon 0.7173775258731967 - Mean Reward 709.87 - Mean Length 196.82 - Mean Loss 1.151 - Mean Q Value 34.152 - Time Delta 40.385Time 2023-05-29T20:52:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_1330000.chkpt at step 1330000
Episode 40000 - Step 1332355 - Epsilon 0.7167065537494405 - Mean Reward 699.97 - Mean Length 189.72 - Mean Loss 1.143 - Mean Q Value 34.371 - Time Delta 39.374Time 2023-05-29T20:52:57




Episode 40000 - Step 1336028 - Epsilon 0.716048739939127 - Mean Reward 706.54 - Mean Length 185.75 - Mean Loss 1.142 - Mean Q Value 34.571 - Time Delta 38.086Time 2023-05-29T20:53:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_1340000.chkpt at step 1340000
Episode 40000 - Step 1340080 - Epsilon 0.7153237497443778 - Mean Reward 703.35 - Mean Length 185.18 - Mean Loss 1.131 - Mean Q Value 34.847 - Time Delta 42.324Time 2023-05-29T20:54:17




Episode 40000 - Step 1343859 - Epsilon 0.7146482666788317 - Mean Reward 755.4 - Mean Length 193.33 - Mean Loss 1.134 - Mean Q Value 35.381 - Time Delta 39.403Time 2023-05-29T20:54:56




Episode 40000 - Step 1347411 - Epsilon 0.7140139406210976 - Mean Reward 755.65 - Mean Length 187.99 - Mean Loss 1.131 - Mean Q Value 35.998 - Time Delta 36.988Time 2023-05-29T20:55:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_1350000.chkpt at step 1350000
Episode 40000 - Step 1350947 - Epsilon 0.7133830311217215 - Mean Reward 757.65 - Mean Length 185.92 - Mean Loss 1.124 - Mean Q Value 36.637 - Time Delta 36.837Time 2023-05-29T20:56:10




Episode 40000 - Step 1354633 - Epsilon 0.7127259513723057 - Mean Reward 749.48 - Mean Length 186.05 - Mean Loss 1.12 - Mean Q Value 37.29 - Time Delta 38.354Time 2023-05-29T20:56:49




Episode 40000 - Step 1358185 - Epsilon 0.7120933315730851 - Mean Reward 761.37 - Mean Length 181.05 - Mean Loss 1.137 - Mean Q Value 38.009 - Time Delta 36.806Time 2023-05-29T20:57:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_1360000.chkpt at step 1360000
Episode 40000 - Step 1363290 - Epsilon 0.7111851020322644 - Mean Reward 721.4 - Mean Length 194.31 - Mean Loss 1.159 - Mean Q Value 38.609 - Time Delta 53.177Time 2023-05-29T20:58:19




Episode 40000 - Step 1368241 - Epsilon 0.7103053771142083 - Mean Reward 731.78 - Mean Length 208.3 - Mean Loss 1.185 - Mean Q Value 38.789 - Time Delta 51.492Time 2023-05-29T20:59:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_1370000.chkpt at step 1370000
Episode 40000 - Step 1372042 - Epsilon 0.7096307299376874 - Mean Reward 722.32 - Mean Length 210.95 - Mean Loss 1.211 - Mean Q Value 38.687 - Time Delta 39.299Time 2023-05-29T20:59:49




Episode 40000 - Step 1376030 - Epsilon 0.708923580583975 - Mean Reward 724.59 - Mean Length 213.97 - Mean Loss 1.216 - Mean Q Value 38.498 - Time Delta 41.55Time 2023-05-29T21:00:31




Episode 40000 - Step 1379733 - Epsilon 0.7082675981818589 - Mean Reward 701.22 - Mean Length 215.48 - Mean Loss 1.2 - Mean Q Value 38.059 - Time Delta 38.641Time 2023-05-29T21:01:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_1380000.chkpt at step 1380000
Episode 40000 - Step 1383783 - Epsilon 0.7075508400690317 - Mean Reward 727.89 - Mean Length 204.93 - Mean Loss 1.176 - Mean Q Value 37.407 - Time Delta 42.292Time 2023-05-29T21:01:52




Episode 40000 - Step 1387573 - Epsilon 0.7068807530680623 - Mean Reward 722.54 - Mean Length 193.32 - Mean Loss 1.128 - Mean Q Value 36.938 - Time Delta 39.422Time 2023-05-29T21:02:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_1390000.chkpt at step 1390000
Episode 40000 - Step 1390720 - Epsilon 0.7063248332796414 - Mean Reward 719.64 - Mean Length 186.78 - Mean Loss 1.117 - Mean Q Value 36.574 - Time Delta 32.873Time 2023-05-29T21:03:04




Episode 40000 - Step 1394497 - Epsilon 0.7056582007558068 - Mean Reward 731.38 - Mean Length 184.67 - Mean Loss 1.123 - Mean Q Value 36.384 - Time Delta 38.803Time 2023-05-29T21:03:43




Episode 40000 - Step 1399389 - Epsilon 0.7047957081900577 - Mean Reward 740.51 - Mean Length 196.56 - Mean Loss 1.134 - Mean Q Value 36.031 - Time Delta 51.065Time 2023-05-29T21:04:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_1400000.chkpt at step 1400000
Episode 40000 - Step 1403498 - Epsilon 0.7040720784462253 - Mean Reward 739.86 - Mean Length 197.15 - Mean Loss 1.124 - Mean Q Value 35.829 - Time Delta 42.879Time 2023-05-29T21:05:17




Episode 40000 - Step 1409790 - Epsilon 0.7029654435248873 - Mean Reward 765.08 - Mean Length 222.17 - Mean Loss 1.143 - Mean Q Value 35.843 - Time Delta 65.554Time 2023-05-29T21:06:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_1410000.chkpt at step 1410000
Episode 40000 - Step 1413464 - Epsilon 0.7023200661191942 - Mean Reward 777.83 - Mean Length 227.44 - Mean Loss 1.164 - Mean Q Value 35.807 - Time Delta 38.401Time 2023-05-29T21:07:01




Episode 40000 - Step 1416722 - Epsilon 0.7017482592537431 - Mean Reward 776.14 - Mean Length 222.25 - Mean Loss 1.161 - Mean Q Value 35.817 - Time Delta 33.866Time 2023-05-29T21:07:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_1420000.chkpt at step 1420000
Episode 40000 - Step 1421688 - Epsilon 0.7008775792673587 - Mean Reward 787.8 - Mean Length 222.99 - Mean Loss 1.18 - Mean Q Value 36.089 - Time Delta 51.865Time 2023-05-29T21:08:27




Episode 40000 - Step 1424942 - Epsilon 0.700307647136629 - Mean Reward 766.09 - Mean Length 214.44 - Mean Loss 1.222 - Mean Q Value 36.315 - Time Delta 33.89Time 2023-05-29T21:09:00




Episode 40000 - Step 1429379 - Epsilon 0.6995312614643937 - Mean Reward 727.19 - Mean Length 195.89 - Mean Loss 1.265 - Mean Q Value 36.297 - Time Delta 45.799Time 2023-05-29T21:09:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_1430000.chkpt at step 1430000
Episode 40000 - Step 1433046 - Epsilon 0.6988902599641885 - Mean Reward 720.13 - Mean Length 195.82 - Mean Loss 1.276 - Mean Q Value 36.295 - Time Delta 38.376Time 2023-05-29T21:10:25




Episode 40000 - Step 1436068 - Epsilon 0.6983624477129178 - Mean Reward 698.26 - Mean Length 193.46 - Mean Loss 1.287 - Mean Q Value 36.128 - Time Delta 31.664Time 2023-05-29T21:10:56




Episode 40000 - Step 1439879 - Epsilon 0.6976973996702 - Mean Reward 697.38 - Mean Length 181.91 - Mean Loss 1.286 - Mean Q Value 36.246 - Time Delta 39.696Time 2023-05-29T21:11:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_1440000.chkpt at step 1440000
Episode 40000 - Step 1443603 - Epsilon 0.6970481455845327 - Mean Reward 714.3 - Mean Length 186.61 - Mean Loss 1.299 - Mean Q Value 36.378 - Time Delta 38.895Time 2023-05-29T21:12:15




Episode 40000 - Step 1446380 - Epsilon 0.6965643877927763 - Mean Reward 693.73 - Mean Length 170.01 - Mean Loss 1.29 - Mean Q Value 36.723 - Time Delta 29.027Time 2023-05-29T21:12:44




Episode 40000 - Step 1449290 - Epsilon 0.6960578214231252 - Mean Reward 692.46 - Mean Length 162.44 - Mean Loss 1.288 - Mean Q Value 37.265 - Time Delta 30.556Time 2023-05-29T21:13:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_1450000.chkpt at step 1450000
Episode 40000 - Step 1452516 - Epsilon 0.6954966770318171 - Mean Reward 709.37 - Mean Length 164.48 - Mean Loss 1.295 - Mean Q Value 38.068 - Time Delta 33.797Time 2023-05-29T21:13:48




Episode 40000 - Step 1456835 - Epsilon 0.6947461196809381 - Mean Reward 681.17 - Mean Length 169.56 - Mean Loss 1.293 - Mean Q Value 38.615 - Time Delta 46.164Time 2023-05-29T21:14:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_1460000.chkpt at step 1460000
Episode 40000 - Step 1461669 - Epsilon 0.6939070260147727 - Mean Reward 684.22 - Mean Length 180.66 - Mean Loss 1.263 - Mean Q Value 38.967 - Time Delta 49.826Time 2023-05-29T21:15:24




Episode 40000 - Step 1466276 - Epsilon 0.6931082785645911 - Mean Reward 715.45 - Mean Length 198.96 - Mean Loss 1.232 - Mean Q Value 39.0 - Time Delta 47.379Time 2023-05-29T21:16:12




Episode 40000 - Step 1469866 - Epsilon 0.6924864928748324 - Mean Reward 727.24 - Mean Length 205.76 - Mean Loss 1.207 - Mean Q Value 38.967 - Time Delta 37.564Time 2023-05-29T21:16:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_1470000.chkpt at step 1470000
Episode 40000 - Step 1473962 - Epsilon 0.6917777495559472 - Mean Reward 763.83 - Mean Length 214.46 - Mean Loss 1.201 - Mean Q Value 38.865 - Time Delta 42.4Time 2023-05-29T21:17:32




Episode 40000 - Step 1476952 - Epsilon 0.6912608388429161 - Mean Reward 762.77 - Mean Length 201.17 - Mean Loss 1.196 - Mean Q Value 38.841 - Time Delta 31.263Time 2023-05-29T21:18:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_1480000.chkpt at step 1480000
Episode 40000 - Step 1480397 - Epsilon 0.6906657466693441 - Mean Reward 740.03 - Mean Length 187.28 - Mean Loss 1.198 - Mean Q Value 38.953 - Time Delta 35.994Time 2023-05-29T21:18:39




Episode 40000 - Step 1483933 - Epsilon 0.6900554678558962 - Mean Reward 744.58 - Mean Length 176.57 - Mean Loss 1.237 - Mean Q Value 39.205 - Time Delta 36.405Time 2023-05-29T21:19:15




Episode 40000 - Step 1487792 - Epsilon 0.6893900577888026 - Mean Reward 737.66 - Mean Length 179.26 - Mean Loss 1.25 - Mean Q Value 39.347 - Time Delta 39.819Time 2023-05-29T21:19:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_1490000.chkpt at step 1490000
Episode 40000 - Step 1491013 - Epsilon 0.6888351498251329 - Mean Reward 685.43 - Mean Length 170.51 - Mean Loss 1.242 - Mean Q Value 39.297 - Time Delta 33.77Time 2023-05-29T21:20:29




Episode 40000 - Step 1495371 - Epsilon 0.6880850725143444 - Mean Reward 727.2 - Mean Length 184.19 - Mean Loss 1.257 - Mean Q Value 38.984 - Time Delta 45.293Time 2023-05-29T21:21:14




Episode 40000 - Step 1499331 - Epsilon 0.68740420529224 - Mean Reward 715.9 - Mean Length 189.34 - Mean Loss 1.267 - Mean Q Value 38.786 - Time Delta 41.198Time 2023-05-29T21:21:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_1500000.chkpt at step 1500000
Episode 40000 - Step 1503085 - Epsilon 0.6867593789969468 - Mean Reward 727.07 - Mean Length 191.52 - Mean Loss 1.259 - Mean Q Value 38.928 - Time Delta 39.16Time 2023-05-29T21:22:34




Episode 40000 - Step 1506800 - Epsilon 0.6861218472435593 - Mean Reward 717.3 - Mean Length 190.08 - Mean Loss 1.263 - Mean Q Value 39.225 - Time Delta 38.587Time 2023-05-29T21:23:13




Episode 40000 - Step 1509307 - Epsilon 0.6856919550535412 - Mean Reward 688.23 - Mean Length 182.94 - Mean Loss 1.287 - Mean Q Value 39.502 - Time Delta 25.843Time 2023-05-29T21:23:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_1510000.chkpt at step 1510000
Episode 40000 - Step 1512633 - Epsilon 0.6851220390969934 - Mean Reward 662.59 - Mean Length 172.62 - Mean Loss 1.285 - Mean Q Value 40.002 - Time Delta 34.889Time 2023-05-29T21:24:14




Episode 40000 - Step 1516140 - Epsilon 0.6845216215205925 - Mean Reward 677.35 - Mean Length 168.09 - Mean Loss 1.296 - Mean Q Value 40.397 - Time Delta 36.7Time 2023-05-29T21:24:50




Episode 40000 - Step 1519814 - Epsilon 0.6838931769899138 - Mean Reward 665.06 - Mean Length 167.29 - Mean Loss 1.296 - Mean Q Value 40.521 - Time Delta 38.006Time 2023-05-29T21:25:28




MarioNet saved to ./checkpoints_pytorch/ mario_net_1520000.chkpt at step 1520000
Episode 40000 - Step 1522930 - Epsilon 0.6833606315917585 - Mean Reward 658.83 - Mean Length 161.3 - Mean Loss 1.302 - Mean Q Value 40.422 - Time Delta 32.764Time 2023-05-29T21:26:01




Episode 40000 - Step 1526656 - Epsilon 0.6827243774651739 - Mean Reward 706.54 - Mean Length 173.49 - Mean Loss 1.301 - Mean Q Value 40.292 - Time Delta 38.769Time 2023-05-29T21:26:40




Episode 40000 - Step 1529079 - Epsilon 0.6823109423535886 - Mean Reward 670.71 - Mean Length 164.46 - Mean Loss 1.303 - Mean Q Value 40.174 - Time Delta 25.189Time 2023-05-29T21:27:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_1530000.chkpt at step 1530000
Episode 40000 - Step 1532370 - Epsilon 0.6817498018263773 - Mean Reward 672.56 - Mean Length 162.3 - Mean Loss 1.309 - Mean Q Value 40.162 - Time Delta 33.669Time 2023-05-29T21:27:39




Episode 40000 - Step 1536044 - Epsilon 0.6811239020435538 - Mean Reward 645.23 - Mean Length 162.3 - Mean Loss 1.312 - Mean Q Value 40.272 - Time Delta 38.34Time 2023-05-29T21:28:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_1540000.chkpt at step 1540000
Episode 40000 - Step 1540723 - Epsilon 0.6803276230739471 - Mean Reward 658.48 - Mean Length 177.93 - Mean Loss 1.304 - Mean Q Value 40.355 - Time Delta 48.85Time 2023-05-29T21:29:06




Episode 40000 - Step 1544417 - Epsilon 0.6796996304556753 - Mean Reward 655.97 - Mean Length 177.61 - Mean Loss 1.309 - Mean Q Value 40.429 - Time Delta 38.238Time 2023-05-29T21:29:44




Episode 40000 - Step 1547641 - Epsilon 0.679152013204397 - Mean Reward 682.59 - Mean Length 185.62 - Mean Loss 1.288 - Mean Q Value 40.458 - Time Delta 33.607Time 2023-05-29T21:30:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_1550000.chkpt at step 1550000
Episode 40000 - Step 1551811 - Epsilon 0.6784443660672372 - Mean Reward 714.92 - Mean Length 194.41 - Mean Loss 1.281 - Mean Q Value 40.362 - Time Delta 43.702Time 2023-05-29T21:31:02




Episode 40000 - Step 1555201 - Epsilon 0.6778696279747418 - Mean Reward 748.56 - Mean Length 191.57 - Mean Loss 1.271 - Mean Q Value 40.184 - Time Delta 35.116Time 2023-05-29T21:31:37




Episode 40000 - Step 1559353 - Epsilon 0.6771663642699037 - Mean Reward 763.2 - Mean Length 186.3 - Mean Loss 1.274 - Mean Q Value 40.214 - Time Delta 43.466Time 2023-05-29T21:32:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_1560000.chkpt at step 1560000
Episode 40000 - Step 1562854 - Epsilon 0.676573933635716 - Mean Reward 770.9 - Mean Length 184.37 - Mean Loss 1.268 - Mean Q Value 40.292 - Time Delta 36.419Time 2023-05-29T21:32:57




Episode 40000 - Step 1566133 - Epsilon 0.6760195393476446 - Mean Reward 777.85 - Mean Length 184.92 - Mean Loss 1.295 - Mean Q Value 40.245 - Time Delta 33.462Time 2023-05-29T21:33:30




Episode 40000 - Step 1568655 - Epsilon 0.6755934433156536 - Mean Reward 731.6 - Mean Length 168.44 - Mean Loss 1.282 - Mean Q Value 40.349 - Time Delta 26.358Time 2023-05-29T21:33:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_1570000.chkpt at step 1570000
Episode 40000 - Step 1572521 - Epsilon 0.6749407976128606 - Mean Reward 733.2 - Mean Length 173.2 - Mean Loss 1.286 - Mean Q Value 40.374 - Time Delta 40.311Time 2023-05-29T21:34:37




Episode 40000 - Step 1577184 - Epsilon 0.674154443714719 - Mean Reward 744.39 - Mean Length 178.31 - Mean Loss 1.302 - Mean Q Value 40.295 - Time Delta 48.214Time 2023-05-29T21:35:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_1580000.chkpt at step 1580000
Episode 40000 - Step 1581557 - Epsilon 0.6734178270034238 - Mean Reward 746.29 - Mean Length 187.03 - Mean Loss 1.309 - Mean Q Value 40.361 - Time Delta 45.294Time 2023-05-29T21:36:10




Episode 40000 - Step 1584975 - Epsilon 0.6728426371831238 - Mean Reward 755.17 - Mean Length 188.42 - Mean Loss 1.311 - Mean Q Value 40.49 - Time Delta 35.759Time 2023-05-29T21:36:46




Episode 40000 - Step 1588182 - Epsilon 0.6723034017261819 - Mean Reward 776.88 - Mean Length 195.27 - Mean Loss 1.336 - Mean Q Value 40.621 - Time Delta 33.371Time 2023-05-29T21:37:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_1590000.chkpt at step 1590000
Episode 40000 - Step 1592307 - Epsilon 0.6716104461230794 - Mean Reward 769.47 - Mean Length 197.86 - Mean Loss 1.357 - Mean Q Value 40.831 - Time Delta 42.976Time 2023-05-29T21:38:02




Episode 40000 - Step 1595856 - Epsilon 0.6710148239521823 - Mean Reward 752.87 - Mean Length 186.72 - Mean Loss 1.355 - Mean Q Value 40.91 - Time Delta 37.07Time 2023-05-29T21:38:39




Episode 40000 - Step 1599262 - Epsilon 0.6704436979495186 - Mean Reward 747.79 - Mean Length 177.05 - Mean Loss 1.354 - Mean Q Value 40.971 - Time Delta 34.95Time 2023-05-29T21:39:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_1600000.chkpt at step 1600000
Episode 40000 - Step 1602885 - Epsilon 0.669836718421493 - Mean Reward 735.49 - Mean Length 179.1 - Mean Loss 1.399 - Mean Q Value 41.061 - Time Delta 37.204Time 2023-05-29T21:39:52




Episode 40000 - Step 1607599 - Epsilon 0.6690477807229563 - Mean Reward 767.93 - Mean Length 194.17 - Mean Loss 1.418 - Mean Q Value 40.916 - Time Delta 48.936Time 2023-05-29T21:40:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_1610000.chkpt at step 1610000
Episode 40000 - Step 1611376 - Epsilon 0.6684163304473685 - Mean Reward 778.31 - Mean Length 190.69 - Mean Loss 1.448 - Mean Q Value 40.685 - Time Delta 39.368Time 2023-05-29T21:41:20




Episode 40000 - Step 1615226 - Epsilon 0.6677732891621027 - Mean Reward 775.64 - Mean Length 193.7 - Mean Loss 1.452 - Mean Q Value 40.548 - Time Delta 39.833Time 2023-05-29T21:42:00




Episode 40000 - Step 1617901 - Epsilon 0.6673268650088248 - Mean Reward 752.72 - Mean Length 186.39 - Mean Loss 1.453 - Mean Q Value 40.435 - Time Delta 28.238Time 2023-05-29T21:42:28




MarioNet saved to ./checkpoints_pytorch/ mario_net_1620000.chkpt at step 1620000
Episode 40000 - Step 1620662 - Epsilon 0.6668664015183734 - Mean Reward 748.98 - Mean Length 177.77 - Mean Loss 1.412 - Mean Q Value 40.426 - Time Delta 28.982Time 2023-05-29T21:42:57




Episode 40000 - Step 1624430 - Epsilon 0.6662385090733408 - Mean Reward 718.52 - Mean Length 168.31 - Mean Loss 1.391 - Mean Q Value 40.706 - Time Delta 38.933Time 2023-05-29T21:43:36




Episode 40000 - Step 1629650 - Epsilon 0.6653696347740621 - Mean Reward 718.0 - Mean Length 182.74 - Mean Loss 1.36 - Mean Q Value 40.9 - Time Delta 51.438Time 2023-05-29T21:44:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_1630000.chkpt at step 1630000
Episode 40000 - Step 1633045 - Epsilon 0.6648051418164939 - Mean Reward 713.62 - Mean Length 178.19 - Mean Loss 1.377 - Mean Q Value 41.224 - Time Delta 35.429Time 2023-05-29T21:45:03




Episode 40000 - Step 1636680 - Epsilon 0.6642012744920753 - Mean Reward 731.28 - Mean Length 187.79 - Mean Loss 1.392 - Mean Q Value 41.318 - Time Delta 37.346Time 2023-05-29T21:45:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_1640000.chkpt at step 1640000
Episode 40000 - Step 1640450 - Epsilon 0.6635755596270138 - Mean Reward 763.64 - Mean Length 197.88 - Mean Loss 1.402 - Mean Q Value 41.333 - Time Delta 39.069Time 2023-05-29T21:46:19




Episode 40000 - Step 1643070 - Epsilon 0.6631410598952789 - Mean Reward 740.3 - Mean Length 186.4 - Mean Loss 1.414 - Mean Q Value 41.238 - Time Delta 27.27Time 2023-05-29T21:46:46




Episode 40000 - Step 1645477 - Epsilon 0.6627421347512048 - Mean Reward 700.1 - Mean Length 158.27 - Mean Loss 1.408 - Mean Q Value 41.566 - Time Delta 24.928Time 2023-05-29T21:47:11




Episode 40000 - Step 1648837 - Epsilon 0.6621856650383742 - Mean Reward 703.55 - Mean Length 157.92 - Mean Loss 1.394 - Mean Q Value 41.738 - Time Delta 34.776Time 2023-05-29T21:47:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_1650000.chkpt at step 1650000
Episode 40000 - Step 1653086 - Epsilon 0.6614826316924034 - Mean Reward 712.49 - Mean Length 164.06 - Mean Loss 1.381 - Mean Q Value 41.968 - Time Delta 44.781Time 2023-05-29T21:48:31




Episode 40000 - Step 1656668 - Epsilon 0.6608905390706676 - Mean Reward 698.47 - Mean Length 162.18 - Mean Loss 1.375 - Mean Q Value 42.225 - Time Delta 37.526Time 2023-05-29T21:49:08




Episode 40000 - Step 1659722 - Epsilon 0.6603861416591821 - Mean Reward 697.54 - Mean Length 166.52 - Mean Loss 1.361 - Mean Q Value 42.362 - Time Delta 31.516Time 2023-05-29T21:49:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_1660000.chkpt at step 1660000
Episode 40000 - Step 1662331 - Epsilon 0.6599555451879393 - Mean Reward 699.85 - Mean Length 168.54 - Mean Loss 1.361 - Mean Q Value 42.139 - Time Delta 27.245Time 2023-05-29T21:50:07




Episode 40000 - Step 1665930 - Epsilon 0.6593620171653438 - Mean Reward 699.97 - Mean Length 170.93 - Mean Loss 1.334 - Mean Q Value 42.09 - Time Delta 37.62Time 2023-05-29T21:50:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_1670000.chkpt at step 1670000
Episode 40000 - Step 1671121 - Epsilon 0.6585068849944586 - Mean Reward 677.21 - Mean Length 180.35 - Mean Loss 1.331 - Mean Q Value 41.921 - Time Delta 53.185Time 2023-05-29T21:51:38




Episode 40000 - Step 1674363 - Epsilon 0.6579733813289734 - Mean Reward 649.61 - Mean Length 176.95 - Mean Loss 1.323 - Mean Q Value 41.63 - Time Delta 33.918Time 2023-05-29T21:52:12




Episode 40000 - Step 1678141 - Epsilon 0.6573522187827663 - Mean Reward 659.18 - Mean Length 184.19 - Mean Loss 1.314 - Mean Q Value 41.255 - Time Delta 39.225Time 2023-05-29T21:52:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_1680000.chkpt at step 1680000
Episode 40000 - Step 1682297 - Epsilon 0.6566695844319795 - Mean Reward 688.55 - Mean Length 199.66 - Mean Loss 1.315 - Mean Q Value 40.924 - Time Delta 42.887Time 2023-05-29T21:53:34




Episode 40000 - Step 1685072 - Epsilon 0.6562141778382171 - Mean Reward 661.04 - Mean Length 191.42 - Mean Loss 1.32 - Mean Q Value 40.289 - Time Delta 29.022Time 2023-05-29T21:54:03




Episode 40000 - Step 1689334 - Epsilon 0.6555153539088422 - Mean Reward 675.29 - Mean Length 182.13 - Mean Loss 1.304 - Mean Q Value 39.63 - Time Delta 44.468Time 2023-05-29T21:54:48




MarioNet saved to ./checkpoints_pytorch/ mario_net_1690000.chkpt at step 1690000
Episode 40000 - Step 1692002 - Epsilon 0.6550782708964795 - Mean Reward 673.38 - Mean Length 176.39 - Mean Loss 1.299 - Mean Q Value 39.213 - Time Delta 27.868Time 2023-05-29T21:55:15




Episode 40000 - Step 1696133 - Epsilon 0.6544020879516628 - Mean Reward 704.84 - Mean Length 179.92 - Mean Loss 1.303 - Mean Q Value 38.91 - Time Delta 42.854Time 2023-05-29T21:55:58




Episode 40000 - Step 1699853 - Epsilon 0.6537937768422527 - Mean Reward 704.33 - Mean Length 175.56 - Mean Loss 1.285 - Mean Q Value 38.646 - Time Delta 38.786Time 2023-05-29T21:56:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_1700000.chkpt at step 1700000
Episode 40000 - Step 1704439 - Epsilon 0.6530446317128575 - Mean Reward 762.93 - Mean Length 193.67 - Mean Loss 1.298 - Mean Q Value 38.44 - Time Delta 47.325Time 2023-05-29T21:57:24




Episode 40000 - Step 1707917 - Epsilon 0.6524770561238034 - Mean Reward 774.46 - Mean Length 185.83 - Mean Loss 1.302 - Mean Q Value 38.466 - Time Delta 35.922Time 2023-05-29T21:58:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_1710000.chkpt at step 1710000
Episode 40000 - Step 1711803 - Epsilon 0.6518434823927596 - Mean Reward 814.11 - Mean Length 198.01 - Mean Loss 1.314 - Mean Q Value 38.629 - Time Delta 40.805Time 2023-05-29T21:58:41




Episode 40000 - Step 1715238 - Epsilon 0.6512839520149765 - Mean Reward 800.55 - Mean Length 191.05 - Mean Loss 1.317 - Mean Q Value 38.95 - Time Delta 35.629Time 2023-05-29T21:59:17




Episode 40000 - Step 1718733 - Epsilon 0.650715141126167 - Mean Reward 807.11 - Mean Length 188.8 - Mean Loss 1.336 - Mean Q Value 39.367 - Time Delta 36.244Time 2023-05-29T21:59:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_1720000.chkpt at step 1720000
Episode 40000 - Step 1723384 - Epsilon 0.6499589617102011 - Mean Reward 796.64 - Mean Length 189.45 - Mean Loss 1.335 - Mean Q Value 39.91 - Time Delta 48.619Time 2023-05-29T22:00:42




Episode 40000 - Step 1727089 - Epsilon 0.6493572158731756 - Mean Reward 793.97 - Mean Length 191.72 - Mean Loss 1.357 - Mean Q Value 40.447 - Time Delta 38.801Time 2023-05-29T22:01:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_1730000.chkpt at step 1730000
Episode 40000 - Step 1730854 - Epsilon 0.6487462958766127 - Mean Reward 760.4 - Mean Length 190.51 - Mean Loss 1.367 - Mean Q Value 40.664 - Time Delta 38.945Time 2023-05-29T22:01:59




Episode 40000 - Step 1733949 - Epsilon 0.6482445225159891 - Mean Reward 751.49 - Mean Length 187.11 - Mean Loss 1.359 - Mean Q Value 40.719 - Time Delta 32.33Time 2023-05-29T22:02:32




Episode 40000 - Step 1738116 - Epsilon 0.6475695653295596 - Mean Reward 747.02 - Mean Length 193.83 - Mean Loss 1.342 - Mean Q Value 40.584 - Time Delta 43.037Time 2023-05-29T22:03:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_1740000.chkpt at step 1740000
Episode 40000 - Step 1741969 - Epsilon 0.6469460941952276 - Mean Reward 724.4 - Mean Length 185.85 - Mean Loss 1.337 - Mean Q Value 40.538 - Time Delta 39.798Time 2023-05-29T22:03:54




Episode 40000 - Step 1746148 - Epsilon 0.6462705501275536 - Mean Reward 720.58 - Mean Length 190.59 - Mean Loss 1.307 - Mean Q Value 40.414 - Time Delta 43.5Time 2023-05-29T22:04:38




Episode 40000 - Step 1749556 - Epsilon 0.6457201620485858 - Mean Reward 727.68 - Mean Length 187.02 - Mean Loss 1.279 - Mean Q Value 40.238 - Time Delta 35.812Time 2023-05-29T22:05:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_1750000.chkpt at step 1750000
Episode 40000 - Step 1753314 - Epsilon 0.6451137927670312 - Mean Reward 748.09 - Mean Length 193.65 - Mean Loss 1.29 - Mean Q Value 40.042 - Time Delta 39.6Time 2023-05-29T22:05:53




Episode 40000 - Step 1757204 - Epsilon 0.6444867244864703 - Mean Reward 736.74 - Mean Length 190.88 - Mean Loss 1.292 - Mean Q Value 39.877 - Time Delta 41.138Time 2023-05-29T22:06:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_1760000.chkpt at step 1760000
Episode 40000 - Step 1760492 - Epsilon 0.6439571740077387 - Mean Reward 722.0 - Mean Length 185.23 - Mean Loss 1.287 - Mean Q Value 39.733 - Time Delta 34.408Time 2023-05-29T22:07:09




Episode 40000 - Step 1763600 - Epsilon 0.643457013558444 - Mean Reward 710.49 - Mean Length 174.52 - Mean Loss 1.324 - Mean Q Value 39.573 - Time Delta 32.268Time 2023-05-29T22:07:41




Episode 40000 - Step 1767576 - Epsilon 0.6428177349810548 - Mean Reward 739.97 - Mean Length 180.2 - Mean Loss 1.353 - Mean Q Value 39.485 - Time Delta 41.357Time 2023-05-29T22:08:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_1770000.chkpt at step 1770000
Episode 40000 - Step 1770682 - Epsilon 0.6423187806914763 - Mean Reward 723.5 - Mean Length 173.68 - Mean Loss 1.379 - Mean Q Value 39.716 - Time Delta 32.347Time 2023-05-29T22:08:55




Episode 40000 - Step 1774652 - Epsilon 0.6416815954773695 - Mean Reward 743.02 - Mean Length 174.48 - Mean Loss 1.422 - Mean Q Value 40.014 - Time Delta 40.624Time 2023-05-29T22:09:36




Episode 40000 - Step 1778157 - Epsilon 0.6411195681831716 - Mean Reward 757.61 - Mean Length 176.65 - Mean Loss 1.448 - Mean Q Value 40.248 - Time Delta 36.606Time 2023-05-29T22:10:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_1780000.chkpt at step 1780000
Episode 40000 - Step 1782293 - Epsilon 0.640456993077118 - Mean Reward 783.58 - Mean Length 186.93 - Mean Loss 1.432 - Mean Q Value 40.507 - Time Delta 43.389Time 2023-05-29T22:10:56




Episode 40000 - Step 1785211 - Epsilon 0.6399899500168894 - Mean Reward 755.26 - Mean Length 176.35 - Mean Loss 1.433 - Mean Q Value 40.81 - Time Delta 30.229Time 2023-05-29T22:11:26




Episode 40000 - Step 1789817 - Epsilon 0.6392534256325719 - Mean Reward 755.52 - Mean Length 191.35 - Mean Loss 1.415 - Mean Q Value 40.841 - Time Delta 47.695Time 2023-05-29T22:12:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_1790000.chkpt at step 1790000
Episode 40000 - Step 1794341 - Epsilon 0.6385308386177396 - Mean Reward 754.32 - Mean Length 196.89 - Mean Loss 1.385 - Mean Q Value 40.744 - Time Delta 47.166Time 2023-05-29T22:13:01




Episode 40000 - Step 1797439 - Epsilon 0.6380364878834014 - Mean Reward 750.79 - Mean Length 192.82 - Mean Loss 1.361 - Mean Q Value 40.583 - Time Delta 32.004Time 2023-05-29T22:13:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_1800000.chkpt at step 1800000
Episode 40000 - Step 1801633 - Epsilon 0.6373678571331104 - Mean Reward 732.03 - Mean Length 193.4 - Mean Loss 1.335 - Mean Q Value 40.453 - Time Delta 43.883Time 2023-05-29T22:14:16




Episode 40000 - Step 1804948 - Epsilon 0.6368398572755964 - Mean Reward 740.55 - Mean Length 197.37 - Mean Loss 1.302 - Mean Q Value 40.232 - Time Delta 34.427Time 2023-05-29T22:14:51




Episode 40000 - Step 1808392 - Epsilon 0.6362917740735718 - Mean Reward 762.73 - Mean Length 185.75 - Mean Loss 1.279 - Mean Q Value 40.127 - Time Delta 35.648Time 2023-05-29T22:15:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_1810000.chkpt at step 1810000
Episode 40000 - Step 1814174 - Epsilon 0.6353726786353983 - Mean Reward 787.92 - Mean Length 198.33 - Mean Loss 1.27 - Mean Q Value 39.812 - Time Delta 60.098Time 2023-05-29T22:16:27




Episode 40000 - Step 1817546 - Epsilon 0.6348372851003922 - Mean Reward 796.82 - Mean Length 201.07 - Mean Loss 1.264 - Mean Q Value 39.527 - Time Delta 35.142Time 2023-05-29T22:17:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_1820000.chkpt at step 1820000
Episode 40000 - Step 1820776 - Epsilon 0.634324860847685 - Mean Reward 776.4 - Mean Length 191.43 - Mean Loss 1.287 - Mean Q Value 39.3 - Time Delta 33.472Time 2023-05-29T22:17:35




Episode 40000 - Step 1823975 - Epsilon 0.633817762279834 - Mean Reward 754.64 - Mean Length 190.27 - Mean Loss 1.305 - Mean Q Value 39.226 - Time Delta 33.7Time 2023-05-29T22:18:09




Episode 40000 - Step 1828504 - Epsilon 0.6331005281495351 - Mean Reward 752.01 - Mean Length 201.12 - Mean Loss 1.304 - Mean Q Value 38.916 - Time Delta 47.521Time 2023-05-29T22:18:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_1830000.chkpt at step 1830000
Episode 40000 - Step 1832679 - Epsilon 0.6324400741250328 - Mean Reward 728.72 - Mean Length 185.05 - Mean Loss 1.317 - Mean Q Value 39.092 - Time Delta 43.382Time 2023-05-29T22:19:40




Episode 40000 - Step 1836297 - Epsilon 0.6318682906344976 - Mean Reward 724.64 - Mean Length 187.51 - Mean Loss 1.334 - Mean Q Value 39.271 - Time Delta 37.883Time 2023-05-29T22:20:18




Episode 40000 - Step 1838844 - Epsilon 0.6314660765186124 - Mean Reward 721.62 - Mean Length 180.68 - Mean Loss 1.351 - Mean Q Value 39.461 - Time Delta 26.755Time 2023-05-29T22:20:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_1840000.chkpt at step 1840000
Episode 40000 - Step 1842593 - Epsilon 0.6308745121295332 - Mean Reward 741.09 - Mean Length 186.18 - Mean Loss 1.395 - Mean Q Value 39.689 - Time Delta 38.65Time 2023-05-29T22:21:23




Episode 40000 - Step 1846520 - Epsilon 0.6302554549292076 - Mean Reward 717.04 - Mean Length 180.16 - Mean Loss 1.455 - Mean Q Value 39.909 - Time Delta 40.605Time 2023-05-29T22:22:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_1850000.chkpt at step 1850000
Episode 40000 - Step 1850027 - Epsilon 0.6297031205549434 - Mean Reward 712.39 - Mean Length 173.48 - Mean Loss 1.481 - Mean Q Value 39.932 - Time Delta 36.669Time 2023-05-29T22:22:40




Episode 40000 - Step 1853760 - Epsilon 0.6291157241806471 - Mean Reward 712.77 - Mean Length 174.63 - Mean Loss 1.554 - Mean Q Value 40.075 - Time Delta 38.727Time 2023-05-29T22:23:19




Episode 40000 - Step 1857262 - Epsilon 0.6285651743335938 - Mean Reward 733.51 - Mean Length 184.18 - Mean Loss 1.611 - Mean Q Value 40.147 - Time Delta 36.405Time 2023-05-29T22:23:56




Episode 40000 - Step 1859853 - Epsilon 0.6281581530292423 - Mean Reward 711.25 - Mean Length 172.6 - Mean Loss 1.628 - Mean Q Value 40.048 - Time Delta 27.385Time 2023-05-29T22:24:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_1860000.chkpt at step 1860000
Episode 40000 - Step 1863346 - Epsilon 0.6276098532897117 - Mean Reward 718.46 - Mean Length 168.26 - Mean Loss 1.669 - Mean Q Value 40.173 - Time Delta 36.638Time 2023-05-29T22:25:00




Episode 40000 - Step 1866264 - Epsilon 0.6271521788015306 - Mean Reward 691.74 - Mean Length 162.37 - Mean Loss 1.707 - Mean Q Value 40.457 - Time Delta 29.997Time 2023-05-29T22:25:30




Episode 40000 - Step 1869233 - Epsilon 0.6266868477559352 - Mean Reward 680.42 - Mean Length 154.73 - Mean Loss 1.686 - Mean Q Value 40.759 - Time Delta 30.893Time 2023-05-29T22:26:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_1870000.chkpt at step 1870000
Episode 40000 - Step 1872506 - Epsilon 0.6261742709152129 - Mean Reward 691.24 - Mean Length 152.44 - Mean Loss 1.689 - Mean Q Value 41.177 - Time Delta 34.01Time 2023-05-29T22:26:35




Episode 40000 - Step 1876413 - Epsilon 0.6255629537202323 - Mean Reward 738.39 - Mean Length 165.6 - Mean Loss 1.676 - Mean Q Value 41.678 - Time Delta 40.397Time 2023-05-29T22:27:15




Episode 40000 - Step 1879447 - Epsilon 0.6250886440651605 - Mean Reward 730.64 - Mean Length 161.01 - Mean Loss 1.639 - Mean Q Value 42.094 - Time Delta 31.217Time 2023-05-29T22:27:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_1880000.chkpt at step 1880000
Episode 40000 - Step 1883499 - Epsilon 0.6244557498045852 - Mean Reward 757.45 - Mean Length 172.35 - Mean Loss 1.629 - Mean Q Value 42.336 - Time Delta 42.143Time 2023-05-29T22:28:28




Episode 40000 - Step 1886682 - Epsilon 0.6239590367354445 - Mean Reward 753.53 - Mean Length 174.49 - Mean Loss 1.602 - Mean Q Value 42.601 - Time Delta 33.205Time 2023-05-29T22:29:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_1890000.chkpt at step 1890000
Episode 40000 - Step 1890132 - Epsilon 0.623421104015772 - Mean Reward 757.4 - Mean Length 176.26 - Mean Loss 1.562 - Mean Q Value 42.789 - Time Delta 36.256Time 2023-05-29T22:29:38




Episode 40000 - Step 1893657 - Epsilon 0.6228719561029777 - Mean Reward 742.93 - Mean Length 172.44 - Mean Loss 1.542 - Mean Q Value 42.973 - Time Delta 36.896Time 2023-05-29T22:30:15




Episode 40000 - Step 1897241 - Epsilon 0.6223141127116607 - Mean Reward 746.37 - Mean Length 177.94 - Mean Loss 1.524 - Mean Q Value 43.142 - Time Delta 37.635Time 2023-05-29T22:30:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_1900000.chkpt at step 1900000
Episode 40000 - Step 1900265 - Epsilon 0.6218438209762738 - Mean Reward 721.36 - Mean Length 167.66 - Mean Loss 1.478 - Mean Q Value 43.322 - Time Delta 42.68Time 2023-05-29T22:31:35




Episode 40000 - Step 1904605 - Epsilon 0.6211694862388771 - Mean Reward 766.2 - Mean Length 179.23 - Mean Loss 1.495 - Mean Q Value 43.402 - Time Delta 36.797Time 2023-05-29T22:32:12




Episode 40000 - Step 1908124 - Epsilon 0.6206232526247439 - Mean Reward 742.95 - Mean Length 179.92 - Mean Loss 1.498 - Mean Q Value 43.322 - Time Delta 40.883Time 2023-05-29T22:32:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_1910000.chkpt at step 1910000
Episode 40000 - Step 1911036 - Epsilon 0.6201716032606295 - Mean Reward 731.92 - Mean Length 173.79 - Mean Loss 1.557 - Mean Q Value 43.011 - Time Delta 34.07Time 2023-05-29T22:33:27




Episode 40000 - Step 1915447 - Epsilon 0.619488085883257 - Mean Reward 743.61 - Mean Length 182.06 - Mean Loss 1.631 - Mean Q Value 42.795 - Time Delta 128.004Time 2023-05-29T22:35:35




Episode 40000 - Step 1919640 - Epsilon 0.6188390476526041 - Mean Reward 764.16 - Mean Length 193.75 - Mean Loss 1.708 - Mean Q Value 42.497 - Time Delta 29.718Time 2023-05-29T22:36:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_1920000.chkpt at step 1920000
Episode 40000 - Step 1922866 - Epsilon 0.6183401551037029 - Mean Reward 723.56 - Mean Length 182.61 - Mean Loss 1.776 - Mean Q Value 41.986 - Time Delta 26.942Time 2023-05-29T22:36:31




Episode 40000 - Step 1925814 - Epsilon 0.6178846062427594 - Mean Reward 712.52 - Mean Length 176.9 - Mean Loss 1.832 - Mean Q Value 41.609 - Time Delta 24.684Time 2023-05-29T22:36:56




Episode 40000 - Step 1929313 - Epsilon 0.6173443479457648 - Mean Reward 698.58 - Mean Length 182.77 - Mean Loss 1.828 - Mean Q Value 41.429 - Time Delta 29.353Time 2023-05-29T22:37:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_1930000.chkpt at step 1930000
Episode 40000 - Step 1932468 - Epsilon 0.616857609513117 - Mean Reward 678.41 - Mean Length 170.21 - Mean Loss 1.793 - Mean Q Value 41.092 - Time Delta 26.419Time 2023-05-29T22:37:52




Episode 40000 - Step 1935653 - Epsilon 0.616366632076435 - Mean Reward 674.32 - Mean Length 160.13 - Mean Loss 1.772 - Mean Q Value 40.939 - Time Delta 26.843Time 2023-05-29T22:38:19




Episode 40000 - Step 1938522 - Epsilon 0.6159247015605512 - Mean Reward 660.69 - Mean Length 156.56 - Mean Loss 1.724 - Mean Q Value 41.048 - Time Delta 23.905Time 2023-05-29T22:38:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_1940000.chkpt at step 1940000
Episode 40000 - Step 1941736 - Epsilon 0.6154300047719913 - Mean Reward 666.9 - Mean Length 159.22 - Mean Loss 1.708 - Mean Q Value 41.144 - Time Delta 26.651Time 2023-05-29T22:39:09




Episode 40000 - Step 1944771 - Epsilon 0.6149632243046924 - Mean Reward 676.15 - Mean Length 154.58 - Mean Loss 1.697 - Mean Q Value 41.261 - Time Delta 25.558Time 2023-05-29T22:39:35




Episode 40000 - Step 1947696 - Epsilon 0.614513696769198 - Mean Reward 671.15 - Mean Length 152.28 - Mean Loss 1.693 - Mean Q Value 41.491 - Time Delta 24.925Time 2023-05-29T22:40:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_1950000.chkpt at step 1950000
Episode 40000 - Step 1951977 - Epsilon 0.6138563652202618 - Mean Reward 659.28 - Mean Length 163.24 - Mean Loss 1.69 - Mean Q Value 41.597 - Time Delta 36.412Time 2023-05-29T22:40:36




Episode 40000 - Step 1954693 - Epsilon 0.613439698170608 - Mean Reward 673.83 - Mean Length 161.71 - Mean Loss 1.679 - Mean Q Value 41.578 - Time Delta 21.617Time 2023-05-29T22:40:58




Episode 40000 - Step 1958322 - Epsilon 0.6128834073204199 - Mean Reward 695.23 - Mean Length 165.86 - Mean Loss 1.627 - Mean Q Value 41.646 - Time Delta 29.618Time 2023-05-29T22:41:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_1960000.chkpt at step 1960000
Episode 40000 - Step 1960853 - Epsilon 0.6124957279609116 - Mean Reward 686.7 - Mean Length 160.82 - Mean Loss 1.585 - Mean Q Value 41.769 - Time Delta 19.477Time 2023-05-29T22:41:47




Episode 40000 - Step 1965002 - Epsilon 0.6118607460620423 - Mean Reward 711.09 - Mean Length 173.06 - Mean Loss 1.569 - Mean Q Value 41.979 - Time Delta 33.551Time 2023-05-29T22:42:20




Episode 40000 - Step 1968013 - Epsilon 0.6114003411344742 - Mean Reward 708.89 - Mean Length 160.36 - Mean Loss 1.529 - Mean Q Value 42.143 - Time Delta 21.937Time 2023-05-29T22:42:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_1970000.chkpt at step 1970000
Episode 40000 - Step 1972615 - Epsilon 0.6106973294390484 - Mean Reward 713.05 - Mean Length 179.22 - Mean Loss 1.512 - Mean Q Value 42.309 - Time Delta 33.665Time 2023-05-29T22:43:16




Episode 40000 - Step 1975118 - Epsilon 0.6103153050756835 - Mean Reward 683.19 - Mean Length 167.96 - Mean Loss 1.523 - Mean Q Value 42.216 - Time Delta 20.732Time 2023-05-29T22:43:37




Episode 40000 - Step 1977805 - Epsilon 0.6099054633892016 - Mean Reward 680.74 - Mean Length 169.52 - Mean Loss 1.529 - Mean Q Value 42.09 - Time Delta 21.866Time 2023-05-29T22:43:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_1980000.chkpt at step 1980000
Episode 40000 - Step 1982058 - Epsilon 0.6092573259512737 - Mean Reward 681.7 - Mean Length 170.56 - Mean Loss 1.541 - Mean Q Value 41.825 - Time Delta 34.637Time 2023-05-29T22:44:33




Episode 40000 - Step 1985043 - Epsilon 0.6088028372171089 - Mean Reward 675.93 - Mean Length 170.3 - Mean Loss 1.538 - Mean Q Value 41.54 - Time Delta 25.01Time 2023-05-29T22:44:58




Episode 40000 - Step 1988144 - Epsilon 0.6083310456603482 - Mean Reward 689.31 - Mean Length 155.29 - Mean Loss 1.535 - Mean Q Value 41.306 - Time Delta 25.218Time 2023-05-29T22:45:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_1990000.chkpt at step 1990000
Episode 40000 - Step 1992126 - Epsilon 0.6077257533628777 - Mean Reward 722.19 - Mean Length 170.08 - Mean Loss 1.522 - Mean Q Value 41.357 - Time Delta 31.811Time 2023-05-29T22:45:55




Episode 40000 - Step 1995490 - Epsilon 0.6072148707966017 - Mean Reward 737.75 - Mean Length 176.85 - Mean Loss 1.513 - Mean Q Value 41.402 - Time Delta 27.014Time 2023-05-29T22:46:22




Episode 40000 - Step 1998790 - Epsilon 0.6067141250515523 - Mean Reward 713.93 - Mean Length 167.32 - Mean Loss 1.495 - Mean Q Value 41.37 - Time Delta 26.399Time 2023-05-29T22:46:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_2000000.chkpt at step 2000000
Episode 40000 - Step 2002388 - Epsilon 0.6061686310005353 - Mean Reward 728.29 - Mean Length 173.45 - Mean Loss 1.493 - Mean Q Value 41.361 - Time Delta 29.413Time 2023-05-29T22:47:18




Episode 40000 - Step 2005931 - Epsilon 0.6056319547840361 - Mean Reward 731.87 - Mean Length 177.87 - Mean Loss 1.481 - Mean Q Value 41.376 - Time Delta 28.833Time 2023-05-29T22:47:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_2010000.chkpt at step 2010000
Episode 40000 - Step 2010129 - Epsilon 0.6049966773881167 - Mean Reward 730.04 - Mean Length 180.03 - Mean Loss 1.46 - Mean Q Value 41.299 - Time Delta 32.748Time 2023-05-29T22:48:20




Episode 40000 - Step 2014107 - Epsilon 0.604395307198092 - Mean Reward 739.52 - Mean Length 186.17 - Mean Loss 1.458 - Mean Q Value 41.211 - Time Delta 30.486Time 2023-05-29T22:48:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_2020000.chkpt at step 2020000
Episode 40000 - Step 2020303 - Epsilon 0.6034598234693778 - Mean Reward 737.56 - Mean Length 215.13 - Mean Loss 1.428 - Mean Q Value 40.993 - Time Delta 49.202Time 2023-05-29T22:49:39




Episode 40000 - Step 2026795 - Epsilon 0.6024812024192212 - Mean Reward 736.14 - Mean Length 244.07 - Mean Loss 1.446 - Mean Q Value 40.472 - Time Delta 53.809Time 2023-05-29T22:50:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_2030000.chkpt at step 2030000
Episode 40000 - Step 2031253 - Epsilon 0.6018101110702632 - Mean Reward 739.37 - Mean Length 253.22 - Mean Loss 1.451 - Mean Q Value 39.296 - Time Delta 36.691Time 2023-05-29T22:51:10




Episode 40000 - Step 2033847 - Epsilon 0.601419963683376 - Mean Reward 711.71 - Mean Length 237.18 - Mean Loss 1.446 - Mean Q Value 38.114 - Time Delta 21.893Time 2023-05-29T22:51:32




Episode 40000 - Step 2036817 - Epsilon 0.6009735750467705 - Mean Reward 694.95 - Mean Length 227.1 - Mean Loss 1.412 - Mean Q Value 37.007 - Time Delta 25.484Time 2023-05-29T22:51:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_2040000.chkpt at step 2040000
Episode 40000 - Step 2041450 - Epsilon 0.6002779002768444 - Mean Reward 738.07 - Mean Length 211.47 - Mean Loss 1.403 - Mean Q Value 36.17 - Time Delta 39.51Time 2023-05-29T22:52:37




Episode 40000 - Step 2045962 - Epsilon 0.5996011684691407 - Mean Reward 748.04 - Mean Length 191.67 - Mean Loss 1.344 - Mean Q Value 35.61 - Time Delta 36.617Time 2023-05-29T22:53:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_2050000.chkpt at step 2050000
Episode 40000 - Step 2050025 - Epsilon 0.5989924327200994 - Mean Reward 762.08 - Mean Length 187.72 - Mean Loss 1.305 - Mean Q Value 35.578 - Time Delta 30.594Time 2023-05-29T22:53:44




Episode 40000 - Step 2053445 - Epsilon 0.5984805130029208 - Mean Reward 778.49 - Mean Length 195.98 - Mean Loss 1.3 - Mean Q Value 35.556 - Time Delta 28.057Time 2023-05-29T22:54:12




Episode 40000 - Step 2056419 - Epsilon 0.5980357080625689 - Mean Reward 780.28 - Mean Length 196.02 - Mean Loss 1.307 - Mean Q Value 35.411 - Time Delta 23.509Time 2023-05-29T22:54:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_2060000.chkpt at step 2060000
Episode 40000 - Step 2060693 - Epsilon 0.5973970480934331 - Mean Reward 765.42 - Mean Length 192.43 - Mean Loss 1.311 - Mean Q Value 35.571 - Time Delta 34.236Time 2023-05-29T22:55:10




Episode 40000 - Step 2063724 - Epsilon 0.5969445418885538 - Mean Reward 752.71 - Mean Length 177.62 - Mean Loss 1.353 - Mean Q Value 36.062 - Time Delta 23.117Time 2023-05-29T22:55:33




Episode 40000 - Step 2066490 - Epsilon 0.5965318973744779 - Mean Reward 717.79 - Mean Length 164.65 - Mean Loss 1.4 - Mean Q Value 37.156 - Time Delta 19.089Time 2023-05-29T22:55:52




Episode 40000 - Step 2069478 - Epsilon 0.5960864543850657 - Mean Reward 706.7 - Mean Length 160.33 - Mean Loss 1.417 - Mean Q Value 38.5 - Time Delta 24.567Time 2023-05-29T22:56:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_2070000.chkpt at step 2070000
Episode 40000 - Step 2072449 - Epsilon 0.595643875498767 - Mean Reward 710.67 - Mean Length 160.3 - Mean Loss 1.446 - Mean Q Value 39.958 - Time Delta 23.707Time 2023-05-29T22:56:40




Episode 40000 - Step 2075926 - Epsilon 0.5951263369629995 - Mean Reward 689.84 - Mean Length 152.33 - Mean Loss 1.464 - Mean Q Value 41.05 - Time Delta 26.187Time 2023-05-29T22:57:06




Episode 40000 - Step 2079086 - Epsilon 0.5946563727582881 - Mean Reward 698.79 - Mean Length 153.62 - Mean Loss 1.465 - Mean Q Value 41.839 - Time Delta 23.944Time 2023-05-29T22:57:30




MarioNet saved to ./checkpoints_pytorch/ mario_net_2080000.chkpt at step 2080000
Episode 40000 - Step 2082857 - Epsilon 0.5940960245683788 - Mean Reward 716.64 - Mean Length 163.67 - Mean Loss 1.487 - Mean Q Value 42.269 - Time Delta 32.136Time 2023-05-29T22:58:02




Episode 40000 - Step 2086265 - Epsilon 0.5935900702592727 - Mean Reward 739.94 - Mean Length 167.87 - Mean Loss 1.522 - Mean Q Value 42.677 - Time Delta 24.997Time 2023-05-29T22:58:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_2090000.chkpt at step 2090000
Episode 40000 - Step 2090182 - Epsilon 0.5930090816232502 - Mean Reward 753.88 - Mean Length 177.33 - Mean Loss 1.528 - Mean Q Value 42.969 - Time Delta 32.565Time 2023-05-29T22:59:00




Episode 40000 - Step 2093622 - Epsilon 0.5924993129811973 - Mean Reward 766.41 - Mean Length 176.96 - Mean Loss 1.574 - Mean Q Value 43.315 - Time Delta 25.66Time 2023-05-29T22:59:26




Episode 40000 - Step 2096839 - Epsilon 0.5920229869174182 - Mean Reward 745.24 - Mean Length 177.53 - Mean Loss 1.605 - Mean Q Value 43.7 - Time Delta 25.216Time 2023-05-29T22:59:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_2100000.chkpt at step 2100000
Episode 40000 - Step 2100859 - Epsilon 0.5914283026200634 - Mean Reward 728.31 - Mean Length 180.02 - Mean Loss 1.611 - Mean Q Value 43.887 - Time Delta 31.426Time 2023-05-29T23:00:22




Episode 40000 - Step 2106944 - Epsilon 0.5905292761975475 - Mean Reward 733.85 - Mean Length 206.79 - Mean Loss 1.619 - Mean Q Value 43.526 - Time Delta 56.687Time 2023-05-29T23:01:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_2110000.chkpt at step 2110000
Episode 40000 - Step 2110782 - Epsilon 0.5899629350317891 - Mean Reward 720.13 - Mean Length 206.0 - Mean Loss 1.648 - Mean Q Value 42.777 - Time Delta 42.301Time 2023-05-29T23:02:01




Episode 40000 - Step 2114776 - Epsilon 0.5893741509668464 - Mean Reward 727.92 - Mean Length 211.54 - Mean Loss 1.635 - Mean Q Value 41.78 - Time Delta 43.398Time 2023-05-29T23:02:45




Episode 40000 - Step 2119027 - Epsilon 0.5887481262223858 - Mean Reward 725.36 - Mean Length 221.88 - Mean Loss 1.622 - Mean Q Value 40.581 - Time Delta 41.74Time 2023-05-29T23:03:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_2120000.chkpt at step 2120000
Episode 40000 - Step 2122162 - Epsilon 0.5882868755969382 - Mean Reward 719.02 - Mean Length 213.03 - Mean Loss 1.606 - Mean Q Value 39.323 - Time Delta 29.136Time 2023-05-29T23:03:56




Episode 40000 - Step 2126358 - Epsilon 0.587670086149874 - Mean Reward 730.06 - Mean Length 194.14 - Mean Loss 1.579 - Mean Q Value 38.332 - Time Delta 34.404Time 2023-05-29T23:04:30




Episode 40000 - Step 2129131 - Epsilon 0.5872628249948944 - Mean Reward 714.19 - Mean Length 183.49 - Mean Loss 1.556 - Mean Q Value 37.715 - Time Delta 22.612Time 2023-05-29T23:04:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_2130000.chkpt at step 2130000
Episode 40000 - Step 2132571 - Epsilon 0.5867579960098012 - Mean Reward 710.6 - Mean Length 177.95 - Mean Loss 1.53 - Mean Q Value 37.417 - Time Delta 28.4Time 2023-05-29T23:05:21




Episode 40000 - Step 2136906 - Epsilon 0.5861224414054463 - Mean Reward 758.92 - Mean Length 178.79 - Mean Loss 1.504 - Mean Q Value 37.379 - Time Delta 35.538Time 2023-05-29T23:05:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_2140000.chkpt at step 2140000
Episode 40000 - Step 2140101 - Epsilon 0.5856544629705795 - Mean Reward 749.64 - Mean Length 179.39 - Mean Loss 1.476 - Mean Q Value 37.384 - Time Delta 26.225Time 2023-05-29T23:06:23




Episode 40000 - Step 2142854 - Epsilon 0.58525152491298 - Mean Reward 709.35 - Mean Length 164.96 - Mean Loss 1.471 - Mean Q Value 37.554 - Time Delta 22.406Time 2023-05-29T23:06:45




Episode 40000 - Step 2146463 - Epsilon 0.5847237198004632 - Mean Reward 725.94 - Mean Length 173.32 - Mean Loss 1.479 - Mean Q Value 38.072 - Time Delta 29.232Time 2023-05-29T23:07:14




Episode 40000 - Step 2149010 - Epsilon 0.5843515154384435 - Mean Reward 694.44 - Mean Length 164.39 - Mean Loss 1.515 - Mean Q Value 38.713 - Time Delta 20.62Time 2023-05-29T23:07:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_2150000.chkpt at step 2150000
Episode 40000 - Step 2152478 - Epsilon 0.583845102173059 - Mean Reward 679.96 - Mean Length 155.72 - Mean Loss 1.562 - Mean Q Value 39.473 - Time Delta 28.073Time 2023-05-29T23:08:03




Episode 40000 - Step 2155172 - Epsilon 0.5834520148345764 - Mean Reward 693.39 - Mean Length 150.71 - Mean Loss 1.607 - Mean Q Value 40.426 - Time Delta 21.834Time 2023-05-29T23:08:25




Episode 40000 - Step 2158879 - Epsilon 0.582911551088266 - Mean Reward 714.72 - Mean Length 160.25 - Mean Loss 1.656 - Mean Q Value 41.512 - Time Delta 29.521Time 2023-05-29T23:08:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_2160000.chkpt at step 2160000
Episode 40000 - Step 2161976 - Epsilon 0.5824604064352925 - Mean Reward 733.01 - Mean Length 155.13 - Mean Loss 1.678 - Mean Q Value 42.379 - Time Delta 25.633Time 2023-05-29T23:09:20




Episode 40000 - Step 2165002 - Epsilon 0.5820199417094939 - Mean Reward 740.17 - Mean Length 159.92 - Mean Loss 1.664 - Mean Q Value 42.99 - Time Delta 25.55Time 2023-05-29T23:09:46




Episode 40000 - Step 2168233 - Epsilon 0.5815500048640624 - Mean Reward 723.81 - Mean Length 157.55 - Mean Loss 1.657 - Mean Q Value 43.615 - Time Delta 26.914Time 2023-05-29T23:10:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_2170000.chkpt at step 2170000
Episode 40000 - Step 2171206 - Epsilon 0.5811179283591998 - Mean Reward 700.64 - Mean Length 160.34 - Mean Loss 1.668 - Mean Q Value 44.09 - Time Delta 24.628Time 2023-05-29T23:10:37




Episode 40000 - Step 2174805 - Epsilon 0.5805953025892772 - Mean Reward 699.92 - Mean Length 159.26 - Mean Loss 1.66 - Mean Q Value 44.184 - Time Delta 28.893Time 2023-05-29T23:11:06




Episode 40000 - Step 2178652 - Epsilon 0.5800371834157463 - Mean Reward 683.65 - Mean Length 166.76 - Mean Loss 1.663 - Mean Q Value 44.191 - Time Delta 30.695Time 2023-05-29T23:11:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_2180000.chkpt at step 2180000
Episode 40000 - Step 2182144 - Epsilon 0.5795310318585867 - Mean Reward 683.25 - Mean Length 171.42 - Mean Loss 1.665 - Mean Q Value 44.236 - Time Delta 28.537Time 2023-05-29T23:12:05




Episode 40000 - Step 2184692 - Epsilon 0.5791619880980268 - Mean Reward 668.55 - Mean Length 164.59 - Mean Loss 1.664 - Mean Q Value 44.17 - Time Delta 20.721Time 2023-05-29T23:12:26




Episode 40000 - Step 2187679 - Epsilon 0.578729660269803 - Mean Reward 669.2 - Mean Length 164.73 - Mean Loss 1.657 - Mean Q Value 44.146 - Time Delta 24.344Time 2023-05-29T23:12:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_2190000.chkpt at step 2190000
Episode 40000 - Step 2191193 - Epsilon 0.5782214694551601 - Mean Reward 659.67 - Mean Length 163.88 - Mean Loss 1.673 - Mean Q Value 44.085 - Time Delta 28.483Time 2023-05-29T23:13:19




Episode 40000 - Step 2194924 - Epsilon 0.5776823847667714 - Mean Reward 672.92 - Mean Length 162.72 - Mean Loss 1.687 - Mean Q Value 43.901 - Time Delta 30.11Time 2023-05-29T23:13:49




Episode 40000 - Step 2199801 - Epsilon 0.5769784746394669 - Mean Reward 727.15 - Mean Length 176.57 - Mean Loss 1.732 - Mean Q Value 43.481 - Time Delta 39.238Time 2023-05-29T23:14:28




MarioNet saved to ./checkpoints_pytorch/ mario_net_2200000.chkpt at step 2200000
Episode 40000 - Step 2203896 - Epsilon 0.5763880951042634 - Mean Reward 728.33 - Mean Length 192.04 - Mean Loss 1.785 - Mean Q Value 42.91 - Time Delta 33.112Time 2023-05-29T23:15:01




Episode 40000 - Step 2207792 - Epsilon 0.5758269663434784 - Mean Reward 750.64 - Mean Length 201.13 - Mean Loss 1.805 - Mean Q Value 42.265 - Time Delta 31.541Time 2023-05-29T23:15:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_2210000.chkpt at step 2210000
Episode 40000 - Step 2212992 - Epsilon 0.5750788775566879 - Mean Reward 788.61 - Mean Length 217.99 - Mean Loss 1.805 - Mean Q Value 41.466 - Time Delta 42.781Time 2023-05-29T23:16:16




Episode 40000 - Step 2216719 - Epsilon 0.5745432972977019 - Mean Reward 793.42 - Mean Length 217.95 - Mean Loss 1.801 - Mean Q Value 40.649 - Time Delta 30.628Time 2023-05-29T23:16:46




Episode 40000 - Step 2219995 - Epsilon 0.574072938916404 - Mean Reward 739.51 - Mean Length 201.94 - Mean Loss 1.769 - Mean Q Value 40.162 - Time Delta 26.641Time 2023-05-29T23:17:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_2220000.chkpt at step 2220000
Episode 40000 - Step 2222923 - Epsilon 0.5736528712360777 - Mean Reward 749.86 - Mean Length 190.27 - Mean Loss 1.721 - Mean Q Value 39.855 - Time Delta 23.798Time 2023-05-29T23:17:37




Episode 40000 - Step 2226293 - Epsilon 0.5731697721656651 - Mean Reward 756.06 - Mean Length 185.01 - Mean Loss 1.7 - Mean Q Value 39.522 - Time Delta 27.343Time 2023-05-29T23:18:04




Episode 40000 - Step 2229897 - Epsilon 0.5726535787164175 - Mean Reward 719.94 - Mean Length 169.05 - Mean Loss 1.664 - Mean Q Value 39.549 - Time Delta 29.128Time 2023-05-29T23:18:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_2230000.chkpt at step 2230000
Episode 40000 - Step 2233849 - Epsilon 0.5720880763140671 - Mean Reward 714.47 - Mean Length 171.3 - Mean Loss 1.636 - Mean Q Value 39.6 - Time Delta 31.962Time 2023-05-29T23:19:05




Episode 40000 - Step 2238370 - Epsilon 0.5714418389586213 - Mean Reward 724.77 - Mean Length 183.75 - Mean Loss 1.611 - Mean Q Value 39.481 - Time Delta 36.551Time 2023-05-29T23:19:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_2240000.chkpt at step 2240000
Episode 40000 - Step 2242325 - Epsilon 0.5708771050060353 - Mean Reward 727.63 - Mean Length 194.02 - Mean Loss 1.608 - Mean Q Value 39.231 - Time Delta 32.076Time 2023-05-29T23:20:14




Episode 40000 - Step 2245249 - Epsilon 0.5704599462801639 - Mean Reward 717.78 - Mean Length 189.56 - Mean Loss 1.595 - Mean Q Value 39.038 - Time Delta 22.503Time 2023-05-29T23:20:36




Episode 40000 - Step 2247928 - Epsilon 0.5700780085990086 - Mean Reward 701.44 - Mean Length 180.31 - Mean Loss 1.607 - Mean Q Value 38.866 - Time Delta 21.168Time 2023-05-29T23:20:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_2250000.chkpt at step 2250000
Episode 40000 - Step 2252981 - Epsilon 0.5693583121385668 - Mean Reward 675.24 - Mean Length 191.32 - Mean Loss 1.584 - Mean Q Value 38.853 - Time Delta 40.594Time 2023-05-29T23:21:38




Episode 40000 - Step 2256234 - Epsilon 0.5688954696621317 - Mean Reward 666.0 - Mean Length 178.64 - Mean Loss 1.583 - Mean Q Value 38.827 - Time Delta 27.533Time 2023-05-29T23:22:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_2260000.chkpt at step 2260000
Episode 40000 - Step 2260333 - Epsilon 0.5683127925568504 - Mean Reward 691.32 - Mean Length 180.08 - Mean Loss 1.577 - Mean Q Value 38.862 - Time Delta 34.147Time 2023-05-29T23:22:40




Episode 40000 - Step 2263183 - Epsilon 0.5679080138610351 - Mean Reward 696.47 - Mean Length 179.34 - Mean Loss 1.577 - Mean Q Value 38.924 - Time Delta 23.909Time 2023-05-29T23:23:04




Episode 40000 - Step 2266469 - Epsilon 0.5674416689466881 - Mean Reward 719.99 - Mean Length 185.41 - Mean Loss 1.566 - Mean Q Value 38.894 - Time Delta 27.602Time 2023-05-29T23:23:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_2270000.chkpt at step 2270000
Episode 40000 - Step 2270506 - Epsilon 0.566869267267458 - Mean Reward 760.77 - Mean Length 175.25 - Mean Loss 1.571 - Mean Q Value 38.876 - Time Delta 32.106Time 2023-05-29T23:24:03




Episode 40000 - Step 2272978 - Epsilon 0.5665190502446648 - Mean Reward 743.77 - Mean Length 167.44 - Mean Loss 1.58 - Mean Q Value 38.974 - Time Delta 20.691Time 2023-05-29T23:24:24




Episode 40000 - Step 2275563 - Epsilon 0.5661530555374212 - Mean Reward 693.59 - Mean Length 152.3 - Mean Loss 1.568 - Mean Q Value 39.153 - Time Delta 21.7Time 2023-05-29T23:24:46




Episode 40000 - Step 2278388 - Epsilon 0.5657533510541881 - Mean Reward 681.03 - Mean Length 152.05 - Mean Loss 1.556 - Mean Q Value 39.561 - Time Delta 23.7Time 2023-05-29T23:25:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_2280000.chkpt at step 2280000
Episode 40000 - Step 2281399 - Epsilon 0.5653276404128541 - Mean Reward 670.04 - Mean Length 149.3 - Mean Loss 1.57 - Mean Q Value 40.098 - Time Delta 24.821Time 2023-05-29T23:25:34




Episode 40000 - Step 2286380 - Epsilon 0.5646241042107291 - Mean Reward 660.76 - Mean Length 158.74 - Mean Loss 1.615 - Mean Q Value 40.639 - Time Delta 41.929Time 2023-05-29T23:26:16




Episode 40000 - Step 2289828 - Epsilon 0.564137607882059 - Mean Reward 684.2 - Mean Length 168.5 - Mean Loss 1.643 - Mean Q Value 41.024 - Time Delta 29.853Time 2023-05-29T23:26:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_2290000.chkpt at step 2290000
Episode 40000 - Step 2294738 - Epsilon 0.5634455537169212 - Mean Reward 728.61 - Mean Length 191.75 - Mean Loss 1.661 - Mean Q Value 41.557 - Time Delta 45.279Time 2023-05-29T23:27:31




Episode 40000 - Step 2297796 - Epsilon 0.5630149641510575 - Mean Reward 732.34 - Mean Length 194.08 - Mean Loss 1.691 - Mean Q Value 41.753 - Time Delta 24.439Time 2023-05-29T23:27:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_2300000.chkpt at step 2300000
Episode 40000 - Step 2301450 - Epsilon 0.5625008847584863 - Mean Reward 734.12 - Mean Length 200.51 - Mean Loss 1.691 - Mean Q Value 41.838 - Time Delta 30.536Time 2023-05-29T23:28:26




Episode 40000 - Step 2304746 - Epsilon 0.5620375748813512 - Mean Reward 710.38 - Mean Length 183.66 - Mean Loss 1.666 - Mean Q Value 41.809 - Time Delta 27.224Time 2023-05-29T23:28:54




Episode 40000 - Step 2308359 - Epsilon 0.5615301435815006 - Mean Reward 726.79 - Mean Length 185.31 - Mean Loss 1.623 - Mean Q Value 41.828 - Time Delta 31.24Time 2023-05-29T23:29:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_2310000.chkpt at step 2310000
Episode 40000 - Step 2313040 - Epsilon 0.5608733972024605 - Mean Reward 710.02 - Mean Length 183.02 - Mean Loss 1.645 - Mean Q Value 41.468 - Time Delta 40.59Time 2023-05-29T23:30:05




Episode 40000 - Step 2316385 - Epsilon 0.560404562824076 - Mean Reward 720.03 - Mean Length 185.89 - Mean Loss 1.679 - Mean Q Value 40.954 - Time Delta 29.24Time 2023-05-29T23:30:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_2320000.chkpt at step 2320000
Episode 40000 - Step 2320065 - Epsilon 0.5598892276523174 - Mean Reward 728.88 - Mean Length 186.15 - Mean Loss 1.706 - Mean Q Value 40.401 - Time Delta 31.974Time 2023-05-29T23:31:07




Episode 40000 - Step 2323807 - Epsilon 0.559365696134383 - Mean Reward 715.33 - Mean Length 190.61 - Mean Loss 1.753 - Mean Q Value 39.631 - Time Delta 31.976Time 2023-05-29T23:31:39




Episode 40000 - Step 2326547 - Epsilon 0.5589826617888394 - Mean Reward 685.98 - Mean Length 181.88 - Mean Loss 1.812 - Mean Q Value 39.088 - Time Delta 23.69Time 2023-05-29T23:32:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_2330000.chkpt at step 2330000
Episode 40000 - Step 2330013 - Epsilon 0.5584985130395506 - Mean Reward 697.37 - Mean Length 169.73 - Mean Loss 1.82 - Mean Q Value 38.786 - Time Delta 29.587Time 2023-05-29T23:32:32




Episode 40000 - Step 2332559 - Epsilon 0.5581431418004238 - Mean Reward 671.08 - Mean Length 161.74 - Mean Loss 1.784 - Mean Q Value 38.634 - Time Delta 21.703Time 2023-05-29T23:32:54




Episode 40000 - Step 2335928 - Epsilon 0.5576732435941001 - Mean Reward 670.23 - Mean Length 158.63 - Mean Loss 1.748 - Mean Q Value 38.916 - Time Delta 28.526Time 2023-05-29T23:33:22




Episode 40000 - Step 2338955 - Epsilon 0.5572513839554933 - Mean Reward 691.48 - Mean Length 151.48 - Mean Loss 1.693 - Mean Q Value 39.492 - Time Delta 25.937Time 2023-05-29T23:33:48




MarioNet saved to ./checkpoints_pytorch/ mario_net_2340000.chkpt at step 2340000
Episode 40000 - Step 2341945 - Epsilon 0.5568349941389594 - Mean Reward 687.21 - Mean Length 153.98 - Mean Loss 1.654 - Mean Q Value 40.0 - Time Delta 25.973Time 2023-05-29T23:34:14




Episode 40000 - Step 2346471 - Epsilon 0.5562052915856941 - Mean Reward 683.46 - Mean Length 164.58 - Mean Loss 1.646 - Mean Q Value 40.276 - Time Delta 38.999Time 2023-05-29T23:34:53




Episode 40000 - Step 2349422 - Epsilon 0.5557951024076264 - Mean Reward 692.12 - Mean Length 168.63 - Mean Loss 1.648 - Mean Q Value 40.51 - Time Delta 25.289Time 2023-05-29T23:35:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_2350000.chkpt at step 2350000
Episode 40000 - Step 2352848 - Epsilon 0.5553192676481952 - Mean Reward 669.77 - Mean Length 169.2 - Mean Loss 1.677 - Mean Q Value 40.557 - Time Delta 29.42Time 2023-05-29T23:35:48




Episode 40000 - Step 2357256 - Epsilon 0.5547076428058912 - Mean Reward 671.28 - Mean Length 183.01 - Mean Loss 1.694 - Mean Q Value 40.515 - Time Delta 37.645Time 2023-05-29T23:36:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_2360000.chkpt at step 2360000
Episode 40000 - Step 2360855 - Epsilon 0.5542087690063479 - Mean Reward 700.48 - Mean Length 189.1 - Mean Loss 1.723 - Mean Q Value 40.315 - Time Delta 30.837Time 2023-05-29T23:36:56




Episode 40000 - Step 2364679 - Epsilon 0.553679198531927 - Mean Reward 715.22 - Mean Length 182.08 - Mean Loss 1.726 - Mean Q Value 40.476 - Time Delta 32.743Time 2023-05-29T23:37:29




Episode 40000 - Step 2368123 - Epsilon 0.5532026858505081 - Mean Reward 731.68 - Mean Length 187.01 - Mean Loss 1.757 - Mean Q Value 40.767 - Time Delta 29.675Time 2023-05-29T23:37:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_2370000.chkpt at step 2370000
Episode 40000 - Step 2371851 - Epsilon 0.5526873410707752 - Mean Reward 760.8 - Mean Length 190.03 - Mean Loss 1.782 - Mean Q Value 40.748 - Time Delta 32.125Time 2023-05-29T23:38:31




Episode 40000 - Step 2375074 - Epsilon 0.5522421925532968 - Mean Reward 750.23 - Mean Length 178.18 - Mean Loss 1.838 - Mean Q Value 40.71 - Time Delta 27.673Time 2023-05-29T23:38:58




Episode 40000 - Step 2378408 - Epsilon 0.5517820904024354 - Mean Reward 738.93 - Mean Length 175.53 - Mean Loss 1.866 - Mean Q Value 40.779 - Time Delta 28.368Time 2023-05-29T23:39:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_2380000.chkpt at step 2380000
Episode 40000 - Step 2382054 - Epsilon 0.551279370113702 - Mean Reward 722.92 - Mean Length 173.75 - Mean Loss 1.927 - Mean Q Value 40.43 - Time Delta 31.527Time 2023-05-29T23:39:58




Episode 40000 - Step 2385703 - Epsilon 0.5507766947630655 - Mean Reward 746.65 - Mean Length 175.8 - Mean Loss 1.93 - Mean Q Value 40.276 - Time Delta 31.662Time 2023-05-29T23:40:30




Episode 40000 - Step 2388857 - Epsilon 0.5503425784577645 - Mean Reward 724.98 - Mean Length 170.06 - Mean Loss 1.93 - Mean Q Value 40.23 - Time Delta 27.34Time 2023-05-29T23:40:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_2390000.chkpt at step 2390000
Episode 40000 - Step 2392291 - Epsilon 0.549870312044424 - Mean Reward 725.51 - Mean Length 172.17 - Mean Loss 1.919 - Mean Q Value 40.252 - Time Delta 29.932Time 2023-05-29T23:41:27




Episode 40000 - Step 2396824 - Epsilon 0.5492475243890703 - Mean Reward 751.49 - Mean Length 184.16 - Mean Loss 1.904 - Mean Q Value 40.259 - Time Delta 39.831Time 2023-05-29T23:42:07




Episode 40000 - Step 2399006 - Epsilon 0.5489479915320469 - Mean Reward 704.19 - Mean Length 169.52 - Mean Loss 1.853 - Mean Q Value 40.469 - Time Delta 19.337Time 2023-05-29T23:42:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_2400000.chkpt at step 2400000
Episode 40000 - Step 2401954 - Epsilon 0.5485435658608471 - Mean Reward 663.07 - Mean Length 162.51 - Mean Loss 1.838 - Mean Q Value 40.616 - Time Delta 25.372Time 2023-05-29T23:42:52




Episode 40000 - Step 2404560 - Epsilon 0.5481863060729933 - Mean Reward 642.1 - Mean Length 157.03 - Mean Loss 1.815 - Mean Q Value 40.718 - Time Delta 22.433Time 2023-05-29T23:43:14




Episode 40000 - Step 2408224 - Epsilon 0.5476843972631512 - Mean Reward 659.77 - Mean Length 159.33 - Mean Loss 1.791 - Mean Q Value 40.885 - Time Delta 31.005Time 2023-05-29T23:43:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_2410000.chkpt at step 2410000
Episode 40000 - Step 2410717 - Epsilon 0.54734315926919 - Mean Reward 602.0 - Mean Length 138.93 - Mean Loss 1.782 - Mean Q Value 41.071 - Time Delta 22.332Time 2023-05-29T23:44:07




Episode 40000 - Step 2413598 - Epsilon 0.546949077245235 - Mean Reward 623.85 - Mean Length 145.92 - Mean Loss 1.76 - Mean Q Value 41.28 - Time Delta 23.384Time 2023-05-29T23:44:31




Episode 40000 - Step 2417877 - Epsilon 0.546364291239854 - Mean Reward 650.47 - Mean Length 159.23 - Mean Loss 1.744 - Mean Q Value 41.247 - Time Delta 36.513Time 2023-05-29T23:45:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_2420000.chkpt at step 2420000
Episode 40000 - Step 2421122 - Epsilon 0.5459212328929813 - Mean Reward 667.88 - Mean Length 165.62 - Mean Loss 1.723 - Mean Q Value 41.236 - Time Delta 27.189Time 2023-05-29T23:45:35




Episode 40000 - Step 2424828 - Epsilon 0.5454156710451294 - Mean Reward 666.86 - Mean Length 166.04 - Mean Loss 1.698 - Mean Q Value 41.173 - Time Delta 31.97Time 2023-05-29T23:46:07




Episode 40000 - Step 2427992 - Epsilon 0.5449844177785225 - Mean Reward 688.18 - Mean Length 172.75 - Mean Loss 1.659 - Mean Q Value 40.976 - Time Delta 26.456Time 2023-05-29T23:46:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_2430000.chkpt at step 2430000
Episode 40000 - Step 2432147 - Epsilon 0.5444186090615278 - Mean Reward 698.6 - Mean Length 185.49 - Mean Loss 1.67 - Mean Q Value 40.53 - Time Delta 36.388Time 2023-05-29T23:47:09




Episode 40000 - Step 2436266 - Epsilon 0.5438582824763922 - Mean Reward 698.57 - Mean Length 183.89 - Mean Loss 1.66 - Mean Q Value 40.229 - Time Delta 36.278Time 2023-05-29T23:47:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_2440000.chkpt at step 2440000
Episode 40000 - Step 2440284 - Epsilon 0.5433122510535112 - Mean Reward 705.17 - Mean Length 191.62 - Mean Loss 1.678 - Mean Q Value 39.788 - Time Delta 33.536Time 2023-05-29T23:48:19




Episode 40000 - Step 2444301 - Epsilon 0.5427669035356015 - Mean Reward 695.09 - Mean Length 194.73 - Mean Loss 1.699 - Mean Q Value 39.171 - Time Delta 35.041Time 2023-05-29T23:48:54




Episode 40000 - Step 2447558 - Epsilon 0.5423251354083714 - Mean Reward 712.13 - Mean Length 195.66 - Mean Loss 1.724 - Mean Q Value 38.566 - Time Delta 28.513Time 2023-05-29T23:49:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_2450000.chkpt at step 2450000
Episode 40000 - Step 2451695 - Epsilon 0.5417645255222755 - Mean Reward 724.91 - Mean Length 195.48 - Mean Loss 1.693 - Mean Q Value 38.161 - Time Delta 36.248Time 2023-05-29T23:49:59




Episode 40000 - Step 2454433 - Epsilon 0.5413938145484712 - Mean Reward 707.31 - Mean Length 181.67 - Mean Loss 1.675 - Mean Q Value 37.799 - Time Delta 23.653Time 2023-05-29T23:50:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_2460000.chkpt at step 2460000
Episode 40000 - Step 2460153 - Epsilon 0.5406201745812672 - Mean Reward 705.18 - Mean Length 198.69 - Mean Loss 1.668 - Mean Q Value 37.413 - Time Delta 48.206Time 2023-05-29T23:51:11




Episode 40000 - Step 2463053 - Epsilon 0.540228366953091 - Mean Reward 696.28 - Mean Length 187.52 - Mean Loss 1.659 - Mean Q Value 36.992 - Time Delta 24.677Time 2023-05-29T23:51:36




Episode 40000 - Step 2466004 - Epsilon 0.5398299604058029 - Mean Reward 687.79 - Mean Length 184.46 - Mean Loss 1.654 - Mean Q Value 36.706 - Time Delta 25.57Time 2023-05-29T23:52:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_2470000.chkpt at step 2470000
Episode 40000 - Step 2470395 - Epsilon 0.5392376871361176 - Mean Reward 666.51 - Mean Length 187.0 - Mean Loss 1.651 - Mean Q Value 36.552 - Time Delta 36.104Time 2023-05-29T23:52:37




Episode 40000 - Step 2473295 - Epsilon 0.5388468814482197 - Mean Reward 680.98 - Mean Length 188.62 - Mean Loss 1.661 - Mean Q Value 36.391 - Time Delta 23.776Time 2023-05-29T23:53:01




Episode 40000 - Step 2476652 - Epsilon 0.5383948438592013 - Mean Reward 686.03 - Mean Length 164.99 - Mean Loss 1.64 - Mean Q Value 36.426 - Time Delta 29.333Time 2023-05-29T23:53:30




Episode 40000 - Step 2479798 - Epsilon 0.5379715627386501 - Mean Reward 687.01 - Mean Length 167.45 - Mean Loss 1.588 - Mean Q Value 36.78 - Time Delta 26.484Time 2023-05-29T23:53:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_2480000.chkpt at step 2480000
Episode 40000 - Step 2482791 - Epsilon 0.5375691760281781 - Mean Reward 675.24 - Mean Length 167.87 - Mean Loss 1.55 - Mean Q Value 37.244 - Time Delta 26.305Time 2023-05-29T23:54:23




Episode 40000 - Step 2486389 - Epsilon 0.5370858499023689 - Mean Reward 702.92 - Mean Length 159.94 - Mean Loss 1.53 - Mean Q Value 37.68 - Time Delta 30.612Time 2023-05-29T23:54:54




Episode 40000 - Step 2488908 - Epsilon 0.536747726523854 - Mean Reward 666.42 - Mean Length 156.13 - Mean Loss 1.491 - Mean Q Value 38.001 - Time Delta 19.786Time 2023-05-29T23:55:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_2490000.chkpt at step 2490000
Episode 40000 - Step 2491365 - Epsilon 0.5364181304291528 - Mean Reward 633.6 - Mean Length 147.13 - Mean Loss 1.491 - Mean Q Value 38.39 - Time Delta 22.353Time 2023-05-29T23:55:36




Episode 40000 - Step 2493816 - Epsilon 0.5360895408605223 - Mean Reward 617.71 - Mean Length 140.18 - Mean Loss 1.507 - Mean Q Value 38.809 - Time Delta 20.71Time 2023-05-29T23:55:57




Episode 40000 - Step 2497569 - Epsilon 0.5355867906754581 - Mean Reward 631.01 - Mean Length 147.78 - Mean Loss 1.504 - Mean Q Value 39.07 - Time Delta 37.878Time 2023-05-29T23:56:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_2500000.chkpt at step 2500000
Episode 40000 - Step 2501597 - Epsilon 0.5350477261743196 - Mean Reward 625.32 - Mean Length 152.08 - Mean Loss 1.54 - Mean Q Value 39.652 - Time Delta 40.717Time 2023-05-29T23:57:15




Episode 40000 - Step 2505325 - Epsilon 0.5345492939366522 - Mean Reward 670.46 - Mean Length 164.17 - Mean Loss 1.574 - Mean Q Value 40.171 - Time Delta 37.883Time 2023-05-29T23:57:53




Episode 40000 - Step 2508798 - Epsilon 0.5340853728830085 - Mean Reward 706.27 - Mean Length 174.33 - Mean Loss 1.571 - Mean Q Value 40.512 - Time Delta 35.987Time 2023-05-29T23:58:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_2510000.chkpt at step 2510000
Episode 40000 - Step 2512219 - Epsilon 0.5336287915841236 - Mean Reward 746.29 - Mean Length 184.03 - Mean Loss 1.606 - Mean Q Value 40.959 - Time Delta 35.476Time 2023-05-29T23:59:05




Episode 40000 - Step 2515143 - Epsilon 0.5332388514291695 - Mean Reward 742.55 - Mean Length 175.74 - Mean Loss 1.641 - Mean Q Value 41.416 - Time Delta 29.599Time 2023-05-29T23:59:34




Episode 40000 - Step 2519978 - Epsilon 0.5325946882814293 - Mean Reward 747.99 - Mean Length 183.81 - Mean Loss 1.618 - Mean Q Value 41.39 - Time Delta 51.012Time 2023-05-30T00:00:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_2520000.chkpt at step 2520000
Episode 40000 - Step 2524017 - Epsilon 0.5320571721517994 - Mean Reward 734.69 - Mean Length 186.92 - Mean Loss 1.616 - Mean Q Value 41.255 - Time Delta 40.833Time 2023-05-30T00:01:06




Episode 40000 - Step 2527412 - Epsilon 0.5316057801570221 - Mean Reward 727.09 - Mean Length 186.14 - Mean Loss 1.614 - Mean Q Value 41.211 - Time Delta 34.456Time 2023-05-30T00:01:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_2530000.chkpt at step 2530000
Episode 40000 - Step 2531482 - Epsilon 0.531065146302169 - Mean Reward 693.43 - Mean Length 192.63 - Mean Loss 1.577 - Mean Q Value 40.723 - Time Delta 41.587Time 2023-05-30T00:02:22




Episode 40000 - Step 2534811 - Epsilon 0.5306233511463841 - Mean Reward 676.69 - Mean Length 196.68 - Mean Loss 1.545 - Mean Q Value 39.89 - Time Delta 33.511Time 2023-05-30T00:02:56




Episode 40000 - Step 2538343 - Epsilon 0.530155017468461 - Mean Reward 659.36 - Mean Length 183.65 - Mean Loss 1.526 - Mean Q Value 39.098 - Time Delta 35.536Time 2023-05-30T00:03:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_2540000.chkpt at step 2540000
Episode 40000 - Step 2541841 - Epsilon 0.5296915995067298 - Mean Reward 662.14 - Mean Length 178.24 - Mean Loss 1.509 - Mean Q Value 38.676 - Time Delta 36.063Time 2023-05-30T00:04:07




Episode 40000 - Step 2544911 - Epsilon 0.5292852171224454 - Mean Reward 640.44 - Mean Length 174.99 - Mean Loss 1.482 - Mean Q Value 38.299 - Time Delta 31.691Time 2023-05-30T00:04:39




Episode 40000 - Step 2548395 - Epsilon 0.5288244103511478 - Mean Reward 669.08 - Mean Length 169.13 - Mean Loss 1.467 - Mean Q Value 38.031 - Time Delta 35.824Time 2023-05-30T00:05:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_2550000.chkpt at step 2550000
Episode 40000 - Step 2551377 - Epsilon 0.5284303186198596 - Mean Reward 670.69 - Mean Length 165.66 - Mean Loss 1.468 - Mean Q Value 38.137 - Time Delta 30.298Time 2023-05-30T00:05:45




Episode 40000 - Step 2555613 - Epsilon 0.5278710070501973 - Mean Reward 648.52 - Mean Length 172.7 - Mean Loss 1.462 - Mean Q Value 37.95 - Time Delta 42.511Time 2023-05-30T00:06:27




Episode 40000 - Step 2559223 - Epsilon 0.5273948183192346 - Mean Reward 659.18 - Mean Length 173.82 - Mean Loss 1.434 - Mean Q Value 37.64 - Time Delta 36.373Time 2023-05-30T00:07:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_2560000.chkpt at step 2560000
Episode 40000 - Step 2563433 - Epsilon 0.5268400272135598 - Mean Reward 681.53 - Mean Length 185.22 - Mean Loss 1.461 - Mean Q Value 37.485 - Time Delta 42.658Time 2023-05-30T00:07:46




Episode 40000 - Step 2566887 - Epsilon 0.526385297151114 - Mean Reward 665.08 - Mean Length 184.92 - Mean Loss 1.475 - Mean Q Value 37.304 - Time Delta 35.223Time 2023-05-30T00:08:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_2570000.chkpt at step 2570000
Episode 40000 - Step 2571137 - Epsilon 0.5258263097175988 - Mean Reward 720.72 - Mean Length 197.6 - Mean Loss 1.481 - Mean Q Value 37.167 - Time Delta 42.864Time 2023-05-30T00:09:05




Episode 40000 - Step 2576122 - Epsilon 0.5251714067683246 - Mean Reward 733.33 - Mean Length 205.09 - Mean Loss 1.529 - Mean Q Value 37.329 - Time Delta 50.483Time 2023-05-30T00:09:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_2580000.chkpt at step 2580000
Episode 40000 - Step 2580143 - Epsilon 0.524643743406873 - Mean Reward 729.08 - Mean Length 209.2 - Mean Loss 1.583 - Mean Q Value 37.366 - Time Delta 40.751Time 2023-05-30T00:10:36




Episode 40000 - Step 2583003 - Epsilon 0.5242687571569709 - Mean Reward 713.01 - Mean Length 195.7 - Mean Loss 1.628 - Mean Q Value 37.018 - Time Delta 27.898Time 2023-05-30T00:11:04




Episode 40000 - Step 2585915 - Epsilon 0.523887228347353 - Mean Reward 687.52 - Mean Length 190.28 - Mean Loss 1.669 - Mean Q Value 36.761 - Time Delta 31.074Time 2023-05-30T00:11:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_2590000.chkpt at step 2590000
Episode 40000 - Step 2592219 - Epsilon 0.5230622322399309 - Mean Reward 687.28 - Mean Length 210.82 - Mean Loss 1.695 - Mean Q Value 36.181 - Time Delta 66.626Time 2023-05-30T00:12:41




Episode 40000 - Step 2596040 - Epsilon 0.5225628155519585 - Mean Reward 709.17 - Mean Length 199.18 - Mean Loss 1.692 - Mean Q Value 35.747 - Time Delta 40.378Time 2023-05-30T00:13:22




Episode 40000 - Step 2599764 - Epsilon 0.5220765359081251 - Mean Reward 711.52 - Mean Length 196.21 - Mean Loss 1.671 - Mean Q Value 35.41 - Time Delta 39.704Time 2023-05-30T00:14:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_2600000.chkpt at step 2600000
Episode 40000 - Step 2602460 - Epsilon 0.5217247748357023 - Mean Reward 719.57 - Mean Length 194.57 - Mean Loss 1.649 - Mean Q Value 35.367 - Time Delta 28.453Time 2023-05-30T00:14:30




Episode 40000 - Step 2605140 - Epsilon 0.5213753362678257 - Mean Reward 739.93 - Mean Length 192.25 - Mean Loss 1.627 - Mean Q Value 35.498 - Time Delta 28.164Time 2023-05-30T00:14:58




Episode 40000 - Step 2608155 - Epsilon 0.5209824976286164 - Mean Reward 710.31 - Mean Length 159.36 - Mean Loss 1.611 - Mean Q Value 36.083 - Time Delta 31.278Time 2023-05-30T00:15:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_2610000.chkpt at step 2610000
Episode 40000 - Step 2612008 - Epsilon 0.5204809027944024 - Mean Reward 724.71 - Mean Length 159.68 - Mean Loss 1.602 - Mean Q Value 36.552 - Time Delta 40.781Time 2023-05-30T00:16:10




Episode 40000 - Step 2615765 - Epsilon 0.5199922705551421 - Mean Reward 739.2 - Mean Length 160.01 - Mean Loss 1.588 - Mean Q Value 37.221 - Time Delta 40.104Time 2023-05-30T00:16:50




Episode 40000 - Step 2619162 - Epsilon 0.5195508545269455 - Mean Reward 751.97 - Mean Length 167.02 - Mean Loss 1.526 - Mean Q Value 37.998 - Time Delta 35.757Time 2023-05-30T00:17:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_2620000.chkpt at step 2620000
Episode 40000 - Step 2623175 - Epsilon 0.5190298764462736 - Mean Reward 763.82 - Mean Length 180.35 - Mean Loss 1.502 - Mean Q Value 38.588 - Time Delta 42.757Time 2023-05-30T00:18:09




Episode 40000 - Step 2626539 - Epsilon 0.5185935557641388 - Mean Reward 756.81 - Mean Length 183.84 - Mean Loss 1.472 - Mean Q Value 38.96 - Time Delta 35.987Time 2023-05-30T00:18:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_2630000.chkpt at step 2630000
Episode 40000 - Step 2632099 - Epsilon 0.5178732113868076 - Mean Reward 709.28 - Mean Length 200.91 - Mean Loss 1.439 - Mean Q Value 39.385 - Time Delta 59.393Time 2023-05-30T00:19:44




Episode 40000 - Step 2637784 - Epsilon 0.5171377067848487 - Mean Reward 665.66 - Mean Length 220.19 - Mean Loss 1.403 - Mean Q Value 39.21 - Time Delta 60.377Time 2023-05-30T00:20:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_2640000.chkpt at step 2640000
Episode 40000 - Step 2642395 - Epsilon 0.5165419196815563 - Mean Reward 676.55 - Mean Length 232.33 - Mean Loss 1.411 - Mean Q Value 38.57 - Time Delta 48.988Time 2023-05-30T00:21:34




Episode 40000 - Step 2645635 - Epsilon 0.516123690080132 - Mean Reward 670.66 - Mean Length 224.6 - Mean Loss 1.397 - Mean Q Value 37.797 - Time Delta 34.184Time 2023-05-30T00:22:08




Episode 40000 - Step 2649861 - Epsilon 0.5155786932786158 - Mean Reward 678.95 - Mean Length 233.22 - Mean Loss 1.394 - Mean Q Value 36.909 - Time Delta 44.687Time 2023-05-30T00:22:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_2650000.chkpt at step 2650000
Episode 40000 - Step 2652592 - Epsilon 0.5152268020222921 - Mean Reward 688.62 - Mean Length 204.93 - Mean Loss 1.434 - Mean Q Value 35.913 - Time Delta 29.22Time 2023-05-30T00:23:22




Episode 40000 - Step 2656989 - Epsilon 0.5146607500626392 - Mean Reward 723.14 - Mean Length 192.05 - Mean Loss 1.452 - Mean Q Value 35.158 - Time Delta 46.828Time 2023-05-30T00:24:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_2660000.chkpt at step 2660000
Episode 40000 - Step 2660905 - Epsilon 0.5141571436808556 - Mean Reward 728.69 - Mean Length 185.1 - Mean Loss 1.46 - Mean Q Value 34.669 - Time Delta 41.655Time 2023-05-30T00:24:50




Episode 40000 - Step 2663741 - Epsilon 0.5137927354183783 - Mean Reward 712.19 - Mean Length 181.06 - Mean Loss 1.49 - Mean Q Value 34.48 - Time Delta 29.992Time 2023-05-30T00:25:20




Episode 40000 - Step 2666350 - Epsilon 0.5134577233324719 - Mean Reward 681.36 - Mean Length 164.89 - Mean Loss 1.516 - Mean Q Value 34.494 - Time Delta 23.527Time 2023-05-30T00:25:44




Episode 40000 - Step 2669683 - Epsilon 0.5130300628297887 - Mean Reward 696.88 - Mean Length 170.91 - Mean Loss 1.496 - Mean Q Value 34.694 - Time Delta 34.085Time 2023-05-30T00:26:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_2670000.chkpt at step 2670000
Episode 40000 - Step 2673487 - Epsilon 0.5125424030979661 - Mean Reward 690.68 - Mean Length 164.98 - Mean Loss 1.575 - Mean Q Value 35.45 - Time Delta 38.165Time 2023-05-30T00:26:56




Episode 40000 - Step 2677858 - Epsilon 0.5119826282196299 - Mean Reward 675.62 - Mean Length 169.53 - Mean Loss 1.648 - Mean Q Value 36.395 - Time Delta 43.734Time 2023-05-30T00:27:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_2680000.chkpt at step 2680000
Episode 40000 - Step 2682288 - Epsilon 0.5114159212598659 - Mean Reward 712.05 - Mean Length 185.47 - Mean Loss 1.691 - Mean Q Value 37.241 - Time Delta 45.583Time 2023-05-30T00:28:25




Episode 40000 - Step 2687094 - Epsilon 0.5108018239464625 - Mean Reward 771.83 - Mean Length 207.44 - Mean Loss 1.729 - Mean Q Value 37.948 - Time Delta 49.774Time 2023-05-30T00:29:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_2690000.chkpt at step 2690000
Episode 40000 - Step 2690472 - Epsilon 0.5103706338478254 - Mean Reward 747.34 - Mean Length 207.89 - Mean Loss 1.793 - Mean Q Value 38.41 - Time Delta 34.974Time 2023-05-30T00:29:50




Episode 40000 - Step 2694445 - Epsilon 0.5098639598209953 - Mean Reward 763.79 - Mean Length 209.58 - Mean Loss 1.785 - Mean Q Value 38.426 - Time Delta 40.535Time 2023-05-30T00:30:30




Episode 40000 - Step 2697303 - Epsilon 0.5094997920905444 - Mean Reward 748.0 - Mean Length 194.45 - Mean Loss 1.765 - Mean Q Value 38.519 - Time Delta 28.795Time 2023-05-30T00:30:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_2700000.chkpt at step 2700000
Episode 40000 - Step 2701602 - Epsilon 0.5089525012735917 - Mean Reward 747.76 - Mean Length 193.14 - Mean Loss 1.741 - Mean Q Value 38.515 - Time Delta 43.655Time 2023-05-30T00:31:43




Episode 40000 - Step 2704297 - Epsilon 0.5086097099737492 - Mean Reward 696.08 - Mean Length 172.03 - Mean Loss 1.737 - Mean Q Value 38.414 - Time Delta 27.725Time 2023-05-30T00:32:11




Episode 40000 - Step 2708379 - Epsilon 0.5080909384479945 - Mean Reward 719.79 - Mean Length 179.07 - Mean Loss 1.681 - Mean Q Value 38.477 - Time Delta 38.875Time 2023-05-30T00:32:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_2710000.chkpt at step 2710000
Episode 40000 - Step 2711599 - Epsilon 0.5076820897750474 - Mean Reward 688.45 - Mean Length 171.54 - Mean Loss 1.666 - Mean Q Value 38.42 - Time Delta 32.413Time 2023-05-30T00:33:22




Episode 40000 - Step 2715047 - Epsilon 0.5072446563198263 - Mean Reward 712.49 - Mean Length 177.44 - Mean Loss 1.63 - Mean Q Value 38.27 - Time Delta 35.427Time 2023-05-30T00:33:57




Episode 40000 - Step 2718594 - Epsilon 0.506795056435381 - Mean Reward 712.36 - Mean Length 169.92 - Mean Loss 1.61 - Mean Q Value 38.326 - Time Delta 36.475Time 2023-05-30T00:34:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_2720000.chkpt at step 2720000
Episode 40000 - Step 2722157 - Epsilon 0.5063438296773466 - Mean Reward 740.95 - Mean Length 178.6 - Mean Loss 1.596 - Mean Q Value 38.655 - Time Delta 37.19Time 2023-05-30T00:35:11




Episode 40000 - Step 2725559 - Epsilon 0.5059133672764207 - Mean Reward 753.06 - Mean Length 171.8 - Mean Loss 1.61 - Mean Q Value 39.077 - Time Delta 35.255Time 2023-05-30T00:35:46




Episode 40000 - Step 2729749 - Epsilon 0.505383700419365 - Mean Reward 791.47 - Mean Length 181.5 - Mean Loss 1.579 - Mean Q Value 39.772 - Time Delta 42.701Time 2023-05-30T00:36:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_2730000.chkpt at step 2730000
Episode 40000 - Step 2733480 - Epsilon 0.5049125234931359 - Mean Reward 791.17 - Mean Length 184.33 - Mean Loss 1.601 - Mean Q Value 40.422 - Time Delta 37.697Time 2023-05-30T00:37:07




Episode 40000 - Step 2736893 - Epsilon 0.504481890573474 - Mean Reward 785.49 - Mean Length 182.99 - Mean Loss 1.613 - Mean Q Value 40.822 - Time Delta 34.512Time 2023-05-30T00:37:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_2740000.chkpt at step 2740000
Episode 40000 - Step 2740522 - Epsilon 0.5040244068784226 - Mean Reward 785.82 - Mean Length 183.65 - Mean Loss 1.589 - Mean Q Value 41.197 - Time Delta 37.196Time 2023-05-30T00:38:18




Episode 40000 - Step 2743962 - Epsilon 0.5035911321691152 - Mean Reward 780.71 - Mean Length 184.03 - Mean Loss 1.603 - Mean Q Value 41.428 - Time Delta 34.623Time 2023-05-30T00:38:53




Episode 40000 - Step 2747274 - Epsilon 0.5031743312389179 - Mean Reward 757.53 - Mean Length 175.25 - Mean Loss 1.603 - Mean Q Value 41.59 - Time Delta 33.1Time 2023-05-30T00:39:26




Episode 40000 - Step 2749682 - Epsilon 0.5028715114115311 - Mean Reward 722.83 - Mean Length 162.02 - Mean Loss 1.579 - Mean Q Value 41.78 - Time Delta 24.732Time 2023-05-30T00:39:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_2750000.chkpt at step 2750000
Episode 40000 - Step 2753388 - Epsilon 0.5024058166643321 - Mean Reward 684.15 - Mean Length 164.95 - Mean Loss 1.598 - Mean Q Value 41.993 - Time Delta 38.05Time 2023-05-30T00:40:29




Episode 40000 - Step 2757932 - Epsilon 0.5018354076388903 - Mean Reward 727.24 - Mean Length 174.1 - Mean Loss 1.619 - Mean Q Value 42.025 - Time Delta 47.291Time 2023-05-30T00:41:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_2760000.chkpt at step 2760000
Episode 40000 - Step 2761790 - Epsilon 0.5013516206714488 - Mean Reward 733.03 - Mean Length 178.28 - Mean Loss 1.602 - Mean Q Value 42.13 - Time Delta 39.91Time 2023-05-30T00:41:56




Episode 40000 - Step 2764648 - Epsilon 0.5009935328357809 - Mean Reward 717.34 - Mean Length 173.74 - Mean Loss 1.638 - Mean Q Value 42.14 - Time Delta 28.993Time 2023-05-30T00:42:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_2770000.chkpt at step 2770000
Episode 40000 - Step 2770438 - Epsilon 0.5002688692081454 - Mean Reward 771.49 - Mean Length 207.56 - Mean Loss 1.656 - Mean Q Value 41.968 - Time Delta 58.26Time 2023-05-30T00:43:23




Episode 40000 - Step 2773696 - Epsilon 0.49986156605993104 - Mean Reward 790.81 - Mean Length 203.08 - Mean Loss 1.665 - Mean Q Value 41.567 - Time Delta 33.461Time 2023-05-30T00:43:57




Episode 40000 - Step 2776891 - Epsilon 0.4994624609979002 - Mean Reward 748.82 - Mean Length 189.59 - Mean Loss 1.679 - Mean Q Value 41.336 - Time Delta 32.384Time 2023-05-30T00:44:29




Episode 40000 - Step 2779542 - Epsilon 0.4991315518775843 - Mean Reward 716.52 - Mean Length 177.52 - Mean Loss 1.693 - Mean Q Value 40.969 - Time Delta 26.703Time 2023-05-30T00:44:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_2780000.chkpt at step 2780000
Episode 40000 - Step 2782849 - Epsilon 0.49871906535056404 - Mean Reward 739.19 - Mean Length 182.01 - Mean Loss 1.716 - Mean Q Value 40.805 - Time Delta 33.203Time 2023-05-30T00:45:29




Episode 40000 - Step 2785911 - Epsilon 0.49833744194327695 - Mean Reward 707.4 - Mean Length 154.73 - Mean Loss 1.748 - Mean Q Value 40.703 - Time Delta 31.574Time 2023-05-30T00:46:01




Episode 40000 - Step 2789708 - Epsilon 0.4978646195161925 - Mean Reward 723.03 - Mean Length 160.12 - Mean Loss 1.751 - Mean Q Value 40.921 - Time Delta 39.295Time 2023-05-30T00:46:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_2790000.chkpt at step 2790000
Episode 40000 - Step 2792915 - Epsilon 0.4974656164792799 - Mean Reward 686.39 - Mean Length 160.24 - Mean Loss 1.795 - Mean Q Value 40.969 - Time Delta 33.267Time 2023-05-30T00:47:13




Episode 40000 - Step 2796171 - Epsilon 0.49706084418148855 - Mean Reward 696.46 - Mean Length 166.29 - Mean Loss 1.819 - Mean Q Value 41.2 - Time Delta 34.437Time 2023-05-30T00:47:48




MarioNet saved to ./checkpoints_pytorch/ mario_net_2800000.chkpt at step 2800000
Episode 40000 - Step 2800236 - Epsilon 0.49655596262180246 - Mean Reward 665.3 - Mean Length 173.87 - Mean Loss 1.817 - Mean Q Value 41.197 - Time Delta 43.141Time 2023-05-30T00:48:31




Episode 40000 - Step 2803821 - Epsilon 0.49611112365787274 - Mean Reward 679.78 - Mean Length 179.1 - Mean Loss 1.847 - Mean Q Value 40.946 - Time Delta 38.126Time 2023-05-30T00:49:09




Episode 40000 - Step 2807062 - Epsilon 0.49570931237542465 - Mean Reward 673.37 - Mean Length 173.54 - Mean Loss 1.853 - Mean Q Value 40.774 - Time Delta 34.175Time 2023-05-30T00:49:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_2810000.chkpt at step 2810000
Episode 40000 - Step 2811050 - Epsilon 0.49521533641710413 - Mean Reward 726.42 - Mean Length 181.35 - Mean Loss 1.843 - Mean Q Value 40.862 - Time Delta 42.255Time 2023-05-30T00:50:25




Episode 40000 - Step 2815096 - Epsilon 0.4947146792916927 - Mean Reward 749.21 - Mean Length 189.25 - Mean Loss 1.859 - Mean Q Value 40.9 - Time Delta 42.739Time 2023-05-30T00:51:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_2820000.chkpt at step 2820000
Episode 40000 - Step 2820458 - Epsilon 0.4940519584690404 - Mean Reward 775.11 - Mean Length 202.22 - Mean Loss 1.855 - Mean Q Value 40.575 - Time Delta 55.852Time 2023-05-30T00:52:04




Episode 40000 - Step 2823578 - Epsilon 0.4936667481447865 - Mean Reward 766.55 - Mean Length 197.57 - Mean Loss 1.813 - Mean Q Value 40.375 - Time Delta 33.383Time 2023-05-30T00:52:37




Episode 40000 - Step 2827169 - Epsilon 0.49322375764331716 - Mean Reward 755.16 - Mean Length 201.07 - Mean Loss 1.779 - Mean Q Value 40.067 - Time Delta 38.125Time 2023-05-30T00:53:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_2830000.chkpt at step 2830000
Episode 40000 - Step 2831099 - Epsilon 0.49273940321889564 - Mean Reward 747.59 - Mean Length 200.49 - Mean Loss 1.74 - Mean Q Value 39.526 - Time Delta 41.527Time 2023-05-30T00:53:57




Episode 40000 - Step 2834540 - Epsilon 0.49231570636299476 - Mean Reward 753.34 - Mean Length 194.44 - Mean Loss 1.693 - Mean Q Value 39.036 - Time Delta 36.74Time 2023-05-30T00:54:34




Episode 40000 - Step 2838648 - Epsilon 0.491810357610323 - Mean Reward 765.52 - Mean Length 181.9 - Mean Loss 1.655 - Mean Q Value 39.038 - Time Delta 43.383Time 2023-05-30T00:55:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_2840000.chkpt at step 2840000
Episode 40000 - Step 2841338 - Epsilon 0.49147972629069764 - Mean Reward 746.93 - Mean Length 177.6 - Mean Loss 1.65 - Mean Q Value 39.339 - Time Delta 28.685Time 2023-05-30T00:55:46




Episode 40000 - Step 2845131 - Epsilon 0.49101390147604274 - Mean Reward 750.37 - Mean Length 179.62 - Mean Loss 1.652 - Mean Q Value 39.695 - Time Delta 40.208Time 2023-05-30T00:56:26




Episode 40000 - Step 2848154 - Epsilon 0.490642957861557 - Mean Reward 717.06 - Mean Length 170.55 - Mean Loss 1.627 - Mean Q Value 40.04 - Time Delta 32.139Time 2023-05-30T00:56:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_2850000.chkpt at step 2850000
Episode 40000 - Step 2851464 - Epsilon 0.49023711870225156 - Mean Reward 692.78 - Mean Length 169.24 - Mean Loss 1.608 - Mean Q Value 40.146 - Time Delta 34.516Time 2023-05-30T00:57:33




Episode 40000 - Step 2855841 - Epsilon 0.48970097006184043 - Mean Reward 673.2 - Mean Length 171.93 - Mean Loss 1.606 - Mean Q Value 40.35 - Time Delta 46.34Time 2023-05-30T00:58:19




Episode 40000 - Step 2859910 - Epsilon 0.48920307497256443 - Mean Reward 707.63 - Mean Length 185.72 - Mean Loss 1.59 - Mean Q Value 40.441 - Time Delta 43.767Time 2023-05-30T00:59:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_2860000.chkpt at step 2860000
Episode 40000 - Step 2863996 - Epsilon 0.48870360911459354 - Mean Reward 711.89 - Mean Length 188.65 - Mean Loss 1.635 - Mean Q Value 40.478 - Time Delta 43.355Time 2023-05-30T00:59:46




Episode 40000 - Step 2868781 - Epsilon 0.48811934688059544 - Mean Reward 747.52 - Mean Length 206.27 - Mean Loss 1.67 - Mean Q Value 40.331 - Time Delta 51.294Time 2023-05-30T01:00:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_2870000.chkpt at step 2870000
Episode 40000 - Step 2872225 - Epsilon 0.4876992569450306 - Mean Reward 766.82 - Mean Length 207.61 - Mean Loss 1.709 - Mean Q Value 40.539 - Time Delta 36.405Time 2023-05-30T01:01:14




Episode 40000 - Step 2876551 - Epsilon 0.48717209524674826 - Mean Reward 783.04 - Mean Length 207.1 - Mean Loss 1.735 - Mean Q Value 40.39 - Time Delta 45.763Time 2023-05-30T01:02:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_2880000.chkpt at step 2880000
Episode 40000 - Step 2881990 - Epsilon 0.4865101130745224 - Mean Reward 805.2 - Mean Length 220.8 - Mean Loss 1.751 - Mean Q Value 39.995 - Time Delta 57.795Time 2023-05-30T01:02:57




Episode 40000 - Step 2886076 - Epsilon 0.4860133966729504 - Mean Reward 817.1 - Mean Length 220.8 - Mean Loss 1.726 - Mean Q Value 39.445 - Time Delta 42.472Time 2023-05-30T01:03:40




Episode 40000 - Step 2889421 - Epsilon 0.48560713780980386 - Mean Reward 792.33 - Mean Length 206.4 - Mean Loss 1.713 - Mean Q Value 38.971 - Time Delta 35.358Time 2023-05-30T01:04:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_2890000.chkpt at step 2890000
Episode 40000 - Step 2892835 - Epsilon 0.4851928488887864 - Mean Reward 782.23 - Mean Length 206.1 - Mean Loss 1.728 - Mean Q Value 38.383 - Time Delta 36.661Time 2023-05-30T01:04:52




Episode 40000 - Step 2896118 - Epsilon 0.48479479018393334 - Mean Reward 769.1 - Mean Length 195.67 - Mean Loss 1.698 - Mean Q Value 38.054 - Time Delta 34.672Time 2023-05-30T01:05:27




Episode 40000 - Step 2899165 - Epsilon 0.4844256383248798 - Mean Reward 723.96 - Mean Length 171.75 - Mean Loss 1.681 - Mean Q Value 38.033 - Time Delta 32.514Time 2023-05-30T01:05:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_2900000.chkpt at step 2900000
Episode 40000 - Step 2903455 - Epsilon 0.4839063702700649 - Mean Reward 739.67 - Mean Length 173.79 - Mean Loss 1.653 - Mean Q Value 38.149 - Time Delta 45.593Time 2023-05-30T01:06:45




Episode 40000 - Step 2906497 - Epsilon 0.48353849933009435 - Mean Reward 717.3 - Mean Length 170.76 - Mean Loss 1.63 - Mean Q Value 38.589 - Time Delta 32.162Time 2023-05-30T01:07:17




Episode 40000 - Step 2909545 - Epsilon 0.4831701832937458 - Mean Reward 715.09 - Mean Length 167.1 - Mean Loss 1.578 - Mean Q Value 39.083 - Time Delta 32.233Time 2023-05-30T01:07:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_2910000.chkpt at step 2910000
Episode 40000 - Step 2915505 - Epsilon 0.4824507957074586 - Mean Reward 717.65 - Mean Length 193.87 - Mean Loss 1.583 - Mean Q Value 39.366 - Time Delta 62.498Time 2023-05-30T01:08:52




Episode 40000 - Step 2919644 - Epsilon 0.48195183787698975 - Mean Reward 754.25 - Mean Length 204.79 - Mean Loss 1.55 - Mean Q Value 39.221 - Time Delta 43.323Time 2023-05-30T01:09:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_2920000.chkpt at step 2920000
Episode 40000 - Step 2925306 - Epsilon 0.48127011756654264 - Mean Reward 738.26 - Mean Length 218.51 - Mean Loss 1.509 - Mean Q Value 39.059 - Time Delta 59.83Time 2023-05-30T01:10:35




Episode 40000 - Step 2928580 - Epsilon 0.48087635909367593 - Mean Reward 753.49 - Mean Length 220.83 - Mean Loss 1.476 - Mean Q Value 38.676 - Time Delta 34.826Time 2023-05-30T01:11:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_2930000.chkpt at step 2930000
Episode 40000 - Step 2932892 - Epsilon 0.48035825362276086 - Mean Reward 756.66 - Mean Length 233.47 - Mean Loss 1.442 - Mean Q Value 38.175 - Time Delta 45.779Time 2023-05-30T01:11:55




Episode 40000 - Step 2938556 - Epsilon 0.47967854759601136 - Mean Reward 748.28 - Mean Length 230.51 - Mean Loss 1.388 - Mean Q Value 37.397 - Time Delta 59.67Time 2023-05-30T01:12:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_2940000.chkpt at step 2940000
Episode 40000 - Step 2943767 - Epsilon 0.47905405315836824 - Mean Reward 698.85 - Mean Length 241.23 - Mean Loss 1.352 - Mean Q Value 36.564 - Time Delta 54.842Time 2023-05-30T01:13:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_2950000.chkpt at step 2950000
Episode 40000 - Step 2950177 - Epsilon 0.478286983720733 - Mean Reward 688.59 - Mean Length 248.71 - Mean Loss 1.291 - Mean Q Value 35.248 - Time Delta 67.64Time 2023-05-30T01:14:57




Episode 40000 - Step 2954178 - Epsilon 0.4778088162887822 - Mean Reward 714.22 - Mean Length 255.98 - Mean Loss 1.238 - Mean Q Value 33.608 - Time Delta 42.056Time 2023-05-30T01:15:39




Episode 40000 - Step 2959957 - Epsilon 0.47711900034090876 - Mean Reward 687.02 - Mean Length 270.65 - Mean Loss 1.178 - Mean Q Value 32.056 - Time Delta 61.361Time 2023-05-30T01:16:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_2960000.chkpt at step 2960000
Episode 40000 - Step 2962793 - Epsilon 0.47678084281835736 - Mean Reward 673.77 - Mean Length 242.37 - Mean Loss 1.159 - Mean Q Value 30.776 - Time Delta 30.327Time 2023-05-30T01:17:11




Episode 40000 - Step 2965475 - Epsilon 0.47646126837256186 - Mean Reward 667.73 - Mean Length 217.08 - Mean Loss 1.124 - Mean Q Value 30.094 - Time Delta 28.229Time 2023-05-30T01:17:39




Episode 40000 - Step 2968598 - Epsilon 0.47608941637135976 - Mean Reward 651.85 - Mean Length 184.21 - Mean Loss 1.125 - Mean Q Value 30.049 - Time Delta 33.325Time 2023-05-30T01:18:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_2970000.chkpt at step 2970000
Episode 40000 - Step 2972632 - Epsilon 0.4756095221622254 - Mean Reward 649.12 - Mean Length 184.54 - Mean Loss 1.148 - Mean Q Value 30.371 - Time Delta 43.036Time 2023-05-30T01:18:56




Episode 40000 - Step 2976486 - Epsilon 0.4751514930212738 - Mean Reward 689.94 - Mean Length 165.29 - Mean Loss 1.178 - Mean Q Value 30.787 - Time Delta 40.631Time 2023-05-30T01:19:36




Episode 40000 - Step 2979998 - Epsilon 0.4747344930479944 - Mean Reward 681.33 - Mean Length 172.05 - Mean Loss 1.18 - Mean Q Value 31.656 - Time Delta 37.291Time 2023-05-30T01:20:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_2980000.chkpt at step 2980000
Episode 40000 - Step 2984409 - Epsilon 0.47421126806650765 - Mean Reward 698.41 - Mean Length 189.34 - Mean Loss 1.272 - Mean Q Value 32.281 - Time Delta 46.928Time 2023-05-30T01:21:01




Episode 40000 - Step 2987896 - Epsilon 0.4737980544775031 - Mean Reward 689.03 - Mean Length 192.98 - Mean Loss 1.34 - Mean Q Value 32.857 - Time Delta 36.368Time 2023-05-30T01:21:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_2990000.chkpt at step 2990000
Episode 40000 - Step 2991965 - Epsilon 0.47331632840576665 - Mean Reward 685.98 - Mean Length 193.33 - Mean Loss 1.387 - Mean Q Value 33.686 - Time Delta 43.406Time 2023-05-30T01:22:20




Episode 40000 - Step 2995117 - Epsilon 0.4729435020052213 - Mean Reward 660.14 - Mean Length 186.31 - Mean Loss 1.415 - Mean Q Value 34.281 - Time Delta 33.859Time 2023-05-30T01:22:54




Episode 40000 - Step 2999744 - Epsilon 0.47239674083481537 - Mean Reward 704.11 - Mean Length 197.46 - Mean Loss 1.457 - Mean Q Value 34.843 - Time Delta 48.769Time 2023-05-30T01:23:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_3000000.chkpt at step 3000000
Episode 40000 - Step 3002837 - Epsilon 0.47203160119957643 - Mean Reward 700.88 - Mean Length 184.28 - Mean Loss 1.458 - Mean Q Value 35.603 - Time Delta 32.878Time 2023-05-30T01:24:16




Episode 40000 - Step 3007506 - Epsilon 0.4714809436841797 - Mean Reward 720.86 - Mean Length 196.1 - Mean Loss 1.437 - Mean Q Value 36.205 - Time Delta 49.851Time 2023-05-30T01:25:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_3010000.chkpt at step 3010000
Episode 40000 - Step 3011295 - Epsilon 0.47103454476348006 - Mean Reward 710.05 - Mean Length 193.3 - Mean Loss 1.432 - Mean Q Value 36.28 - Time Delta 40.193Time 2023-05-30T01:25:46




Episode 40000 - Step 3013814 - Epsilon 0.4707380041048463 - Mean Reward 675.08 - Mean Length 186.97 - Mean Loss 1.462 - Mean Q Value 36.38 - Time Delta 22.755Time 2023-05-30T01:26:09




Episode 40000 - Step 3016965 - Epsilon 0.4703673262156491 - Mean Reward 629.25 - Mean Length 172.21 - Mean Loss 1.472 - Mean Q Value 36.151 - Time Delta 33.597Time 2023-05-30T01:26:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_3020000.chkpt at step 3020000
Episode 40000 - Step 3022304 - Epsilon 0.46973992215540056 - Mean Reward 653.67 - Mean Length 194.67 - Mean Loss 1.453 - Mean Q Value 35.561 - Time Delta 56.077Time 2023-05-30T01:27:38




Episode 40000 - Step 3026479 - Epsilon 0.4692498868320002 - Mean Reward 687.79 - Mean Length 189.73 - Mean Loss 1.497 - Mean Q Value 35.12 - Time Delta 44.677Time 2023-05-30T01:28:23




Episode 40000 - Step 3029886 - Epsilon 0.4688503733578516 - Mean Reward 704.05 - Mean Length 185.91 - Mean Loss 1.505 - Mean Q Value 35.347 - Time Delta 36.23Time 2023-05-30T01:28:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_3030000.chkpt at step 3030000
Episode 40000 - Step 3034092 - Epsilon 0.4683576362305497 - Mean Reward 766.13 - Mean Length 202.78 - Mean Loss 1.517 - Mean Q Value 35.647 - Time Delta 44.341Time 2023-05-30T01:29:44




Episode 40000 - Step 3037332 - Epsilon 0.4679784201010152 - Mean Reward 793.32 - Mean Length 203.67 - Mean Loss 1.54 - Mean Q Value 36.002 - Time Delta 34.66Time 2023-05-30T01:30:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_3040000.chkpt at step 3040000
Episode 40000 - Step 3040734 - Epsilon 0.46758057361313343 - Mean Reward 781.42 - Mean Length 184.3 - Mean Loss 1.562 - Mean Q Value 36.583 - Time Delta 36.212Time 2023-05-30T01:30:54




Episode 40000 - Step 3043995 - Epsilon 0.4671995338452517 - Mean Reward 743.62 - Mean Length 175.16 - Mean Loss 1.587 - Mean Q Value 37.211 - Time Delta 34.482Time 2023-05-30T01:31:29




Episode 40000 - Step 3047877 - Epsilon 0.4667463365904823 - Mean Reward 739.47 - Mean Length 179.91 - Mean Loss 1.614 - Mean Q Value 37.518 - Time Delta 41.385Time 2023-05-30T01:32:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_3050000.chkpt at step 3050000
Episode 40000 - Step 3051593 - Epsilon 0.46631293053783496 - Mean Reward 727.48 - Mean Length 175.01 - Mean Loss 1.629 - Mean Q Value 37.916 - Time Delta 39.353Time 2023-05-30T01:32:50




Episode 40000 - Step 3055339 - Epsilon 0.4658764328457088 - Mean Reward 722.05 - Mean Length 180.07 - Mean Loss 1.646 - Mean Q Value 38.466 - Time Delta 38.902Time 2023-05-30T01:33:29




Episode 40000 - Step 3059391 - Epsilon 0.46540473891352185 - Mean Reward 749.63 - Mean Length 186.57 - Mean Loss 1.691 - Mean Q Value 38.948 - Time Delta 43.288Time 2023-05-30T01:34:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_3060000.chkpt at step 3060000
Episode 40000 - Step 3062973 - Epsilon 0.46498815547076866 - Mean Reward 774.17 - Mean Length 189.78 - Mean Loss 1.731 - Mean Q Value 39.398 - Time Delta 38.56Time 2023-05-30T01:34:50




Episode 40000 - Step 3066121 - Epsilon 0.46462235370851 - Mean Reward 761.86 - Mean Length 182.44 - Mean Loss 1.766 - Mean Q Value 39.908 - Time Delta 33.188Time 2023-05-30T01:35:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_3070000.chkpt at step 3070000
Episode 40000 - Step 3070017 - Epsilon 0.46417003179592453 - Mean Reward 761.06 - Mean Length 184.24 - Mean Loss 1.77 - Mean Q Value 40.165 - Time Delta 41.793Time 2023-05-30T01:36:05




Episode 40000 - Step 3072624 - Epsilon 0.46386760750307693 - Mean Reward 752.78 - Mean Length 172.85 - Mean Loss 1.797 - Mean Q Value 40.286 - Time Delta 27.897Time 2023-05-30T01:36:33




Episode 40000 - Step 3075751 - Epsilon 0.46350512066105565 - Mean Reward 729.28 - Mean Length 163.6 - Mean Loss 1.794 - Mean Q Value 40.524 - Time Delta 33.303Time 2023-05-30T01:37:07




Episode 40000 - Step 3079538 - Epsilon 0.4630665047957291 - Mean Reward 727.27 - Mean Length 165.65 - Mean Loss 1.781 - Mean Q Value 40.637 - Time Delta 40.013Time 2023-05-30T01:37:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_3080000.chkpt at step 3080000
Episode 40000 - Step 3083183 - Epsilon 0.4626447275918785 - Mean Reward 707.45 - Mean Length 170.62 - Mean Loss 1.79 - Mean Q Value 40.653 - Time Delta 38.669Time 2023-05-30T01:38:25




Episode 40000 - Step 3087779 - Epsilon 0.4621134540085145 - Mean Reward 756.46 - Mean Length 177.62 - Mean Loss 1.763 - Mean Q Value 40.807 - Time Delta 48.165Time 2023-05-30T01:39:13




MarioNet saved to ./checkpoints_pytorch/ mario_net_3090000.chkpt at step 3090000
Episode 40000 - Step 3092646 - Epsilon 0.4615515193293682 - Mean Reward 795.49 - Mean Length 200.22 - Mean Loss 1.735 - Mean Q Value 41.012 - Time Delta 51.271Time 2023-05-30T01:40:05




Episode 40000 - Step 3095368 - Epsilon 0.4612375403246461 - Mean Reward 779.52 - Mean Length 196.17 - Mean Loss 1.701 - Mean Q Value 41.088 - Time Delta 29.067Time 2023-05-30T01:40:34




Episode 40000 - Step 3099886 - Epsilon 0.460716866563555 - Mean Reward 790.61 - Mean Length 203.48 - Mean Loss 1.653 - Mean Q Value 41.276 - Time Delta 48.026Time 2023-05-30T01:41:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_3100000.chkpt at step 3100000
Episode 40000 - Step 3103597 - Epsilon 0.46028963464998424 - Mean Reward 811.13 - Mean Length 204.14 - Mean Loss 1.615 - Mean Q Value 41.428 - Time Delta 39.573Time 2023-05-30T01:42:01




Episode 40000 - Step 3107523 - Epsilon 0.4598380819528044 - Mean Reward 785.42 - Mean Length 197.44 - Mean Loss 1.623 - Mean Q Value 41.524 - Time Delta 41.944Time 2023-05-30T01:42:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_3110000.chkpt at step 3110000
Episode 40000 - Step 3110212 - Epsilon 0.4595290596453023 - Mean Reward 735.84 - Mean Length 175.66 - Mean Loss 1.615 - Mean Q Value 41.641 - Time Delta 28.706Time 2023-05-30T01:43:12




Episode 40000 - Step 3113726 - Epsilon 0.4591255405876028 - Mean Reward 752.52 - Mean Length 183.58 - Mean Loss 1.65 - Mean Q Value 41.826 - Time Delta 37.185Time 2023-05-30T01:43:49




Episode 40000 - Step 3117326 - Epsilon 0.4587125134394834 - Mean Reward 739.15 - Mean Length 174.4 - Mean Loss 1.67 - Mean Q Value 41.813 - Time Delta 38.218Time 2023-05-30T01:44:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3120000.chkpt at step 3120000
Episode 40000 - Step 3121227 - Epsilon 0.45826537207754475 - Mean Reward 767.81 - Mean Length 176.3 - Mean Loss 1.686 - Mean Q Value 41.736 - Time Delta 41.263Time 2023-05-30T01:45:09




Episode 40000 - Step 3125597 - Epsilon 0.4577649904791493 - Mean Reward 719.84 - Mean Length 180.74 - Mean Loss 1.679 - Mean Q Value 41.521 - Time Delta 45.915Time 2023-05-30T01:45:55




Episode 40000 - Step 3129553 - Epsilon 0.45731248464795776 - Mean Reward 755.67 - Mean Length 193.41 - Mean Loss 1.654 - Mean Q Value 41.187 - Time Delta 42.247Time 2023-05-30T01:46:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_3130000.chkpt at step 3130000
Episode 40000 - Step 3133946 - Epsilon 0.45681051684240276 - Mean Reward 783.44 - Mean Length 202.2 - Mean Loss 1.617 - Mean Q Value 40.859 - Time Delta 47.038Time 2023-05-30T01:47:24




Episode 40000 - Step 3137176 - Epsilon 0.45644179119692574 - Mean Reward 781.51 - Mean Length 198.5 - Mean Loss 1.599 - Mean Q Value 40.708 - Time Delta 34.375Time 2023-05-30T01:47:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_3140000.chkpt at step 3140000
Episode 40000 - Step 3141830 - Epsilon 0.4559110299366875 - Mean Reward 771.23 - Mean Length 206.03 - Mean Loss 1.544 - Mean Q Value 40.58 - Time Delta 49.762Time 2023-05-30T01:48:48




Episode 40000 - Step 3146580 - Epsilon 0.45536995684665715 - Mean Reward 825.18 - Mean Length 209.83 - Mean Loss 1.537 - Mean Q Value 40.533 - Time Delta 50.119Time 2023-05-30T01:49:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_3150000.chkpt at step 3150000
Episode 40000 - Step 3150704 - Epsilon 0.4549007122995573 - Mean Reward 837.73 - Mean Length 211.51 - Mean Loss 1.531 - Mean Q Value 40.418 - Time Delta 44.021Time 2023-05-30T01:50:22




Episode 40000 - Step 3154538 - Epsilon 0.4544648988092635 - Mean Reward 835.8 - Mean Length 205.92 - Mean Loss 1.505 - Mean Q Value 40.12 - Time Delta 40.838Time 2023-05-30T01:51:03




Episode 40000 - Step 3157427 - Epsilon 0.4541367800012055 - Mean Reward 808.47 - Mean Length 202.51 - Mean Loss 1.486 - Mean Q Value 39.936 - Time Delta 30.318Time 2023-05-30T01:51:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_3160000.chkpt at step 3160000
Episode 40000 - Step 3161588 - Epsilon 0.4536646097868348 - Mean Reward 792.56 - Mean Length 197.58 - Mean Loss 1.497 - Mean Q Value 39.722 - Time Delta 44.439Time 2023-05-30T01:52:18




Episode 40000 - Step 3165131 - Epsilon 0.4532629542179232 - Mean Reward 764.44 - Mean Length 185.51 - Mean Loss 1.504 - Mean Q Value 39.721 - Time Delta 37.974Time 2023-05-30T01:52:56




Episode 40000 - Step 3167101 - Epsilon 0.45303977714690763 - Mean Reward 697.02 - Mean Length 163.97 - Mean Loss 1.52 - Mean Q Value 40.007 - Time Delta 20.811Time 2023-05-30T01:53:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_3170000.chkpt at step 3170000
Episode 40000 - Step 3170601 - Epsilon 0.4526435406710588 - Mean Reward 679.53 - Mean Length 160.63 - Mean Loss 1.556 - Mean Q Value 40.412 - Time Delta 37.142Time 2023-05-30T01:53:54




Episode 40000 - Step 3174481 - Epsilon 0.4522046892590346 - Mean Reward 703.43 - Mean Length 170.54 - Mean Loss 1.591 - Mean Q Value 40.664 - Time Delta 41.297Time 2023-05-30T01:54:35




Episode 40000 - Step 3178347 - Epsilon 0.45176784451143004 - Mean Reward 695.43 - Mean Length 167.59 - Mean Loss 1.624 - Mean Q Value 40.896 - Time Delta 41.01Time 2023-05-30T01:55:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_3180000.chkpt at step 3180000
Episode 40000 - Step 3181921 - Epsilon 0.45136437017087344 - Mean Reward 706.31 - Mean Length 167.9 - Mean Loss 1.64 - Mean Q Value 40.994 - Time Delta 37.799Time 2023-05-30T01:55:54




Episode 40000 - Step 3185503 - Epsilon 0.4509603542519532 - Mean Reward 738.79 - Mean Length 184.02 - Mean Loss 1.673 - Mean Q Value 41.13 - Time Delta 37.876Time 2023-05-30T01:56:32




MarioNet saved to ./checkpoints_pytorch/ mario_net_3190000.chkpt at step 3190000
Episode 40000 - Step 3190373 - Epsilon 0.45041164404710304 - Mean Reward 744.16 - Mean Length 197.72 - Mean Loss 1.7 - Mean Q Value 41.116 - Time Delta 51.089Time 2023-05-30T01:57:23




Episode 40000 - Step 3194312 - Epsilon 0.44996831944292903 - Mean Reward 767.25 - Mean Length 198.31 - Mean Loss 1.729 - Mean Q Value 41.054 - Time Delta 41.741Time 2023-05-30T01:58:05




Episode 40000 - Step 3198543 - Epsilon 0.449492617024979 - Mean Reward 789.58 - Mean Length 201.96 - Mean Loss 1.738 - Mean Q Value 41.128 - Time Delta 45.059Time 2023-05-30T01:58:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_3200000.chkpt at step 3200000
Episode 40000 - Step 3202584 - Epsilon 0.44903874635185886 - Mean Reward 774.69 - Mean Length 206.63 - Mean Loss 1.784 - Mean Q Value 41.091 - Time Delta 42.701Time 2023-05-30T01:59:32




Episode 40000 - Step 3206310 - Epsilon 0.4486206614605816 - Mean Reward 782.15 - Mean Length 208.07 - Mean Loss 1.772 - Mean Q Value 40.889 - Time Delta 39.917Time 2023-05-30T02:00:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_3210000.chkpt at step 3210000
Episode 40000 - Step 3210500 - Epsilon 0.44815097729895215 - Mean Reward 780.4 - Mean Length 201.27 - Mean Loss 1.789 - Mean Q Value 40.619 - Time Delta 44.918Time 2023-05-30T02:00:57




Episode 40000 - Step 3213040 - Epsilon 0.4478664917264951 - Mean Reward 740.26 - Mean Length 187.28 - Mean Loss 1.805 - Mean Q Value 40.452 - Time Delta 26.847Time 2023-05-30T02:01:24




Episode 40000 - Step 3218574 - Epsilon 0.4472472967845712 - Mean Reward 756.29 - Mean Length 200.31 - Mean Loss 1.798 - Mean Q Value 40.282 - Time Delta 58.98Time 2023-05-30T02:02:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_3220000.chkpt at step 3220000
Episode 40000 - Step 3222919 - Epsilon 0.44676173811447206 - Mean Reward 763.91 - Mean Length 203.35 - Mean Loss 1.769 - Mean Q Value 39.989 - Time Delta 45.946Time 2023-05-30T02:03:09




Episode 40000 - Step 3226911 - Epsilon 0.4463160922583385 - Mean Reward 761.11 - Mean Length 206.01 - Mean Loss 1.738 - Mean Q Value 39.652 - Time Delta 41.796Time 2023-05-30T02:03:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_3230000.chkpt at step 3230000
Episode 40000 - Step 3233063 - Epsilon 0.4456301856200224 - Mean Reward 799.49 - Mean Length 225.63 - Mean Loss 1.671 - Mean Q Value 39.314 - Time Delta 65.379Time 2023-05-30T02:04:56




Episode 40000 - Step 3236499 - Epsilon 0.4452475536066703 - Mean Reward 829.13 - Mean Length 234.59 - Mean Loss 1.603 - Mean Q Value 38.841 - Time Delta 36.23Time 2023-05-30T02:05:32




Episode 40000 - Step 3239993 - Epsilon 0.44485879963296293 - Mean Reward 815.36 - Mean Length 214.19 - Mean Loss 1.545 - Mean Q Value 38.511 - Time Delta 37.604Time 2023-05-30T02:06:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_3240000.chkpt at step 3240000
Episode 40000 - Step 3243449 - Epsilon 0.444474607576871 - Mean Reward 833.71 - Mean Length 205.3 - Mean Loss 1.485 - Mean Q Value 38.56 - Time Delta 37.008Time 2023-05-30T02:06:47




Episode 40000 - Step 3247192 - Epsilon 0.4440588849473666 - Mean Reward 863.1 - Mean Length 202.81 - Mean Loss 1.441 - Mean Q Value 38.82 - Time Delta 39.669Time 2023-05-30T02:07:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3250000.chkpt at step 3250000
Episode 40000 - Step 3251904 - Epsilon 0.44353609150127254 - Mean Reward 822.73 - Mean Length 188.41 - Mean Loss 1.425 - Mean Q Value 39.201 - Time Delta 49.743Time 2023-05-30T02:08:16




Episode 40000 - Step 3256157 - Epsilon 0.44306475231310083 - Mean Reward 822.74 - Mean Length 196.58 - Mean Loss 1.393 - Mean Q Value 39.469 - Time Delta 45.038Time 2023-05-30T02:09:01




Episode 40000 - Step 3258871 - Epsilon 0.4427642348031432 - Mean Reward 791.22 - Mean Length 188.78 - Mean Loss 1.377 - Mean Q Value 39.685 - Time Delta 28.081Time 2023-05-30T02:09:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_3260000.chkpt at step 3260000
Episode 40000 - Step 3261317 - Epsilon 0.44249356720474275 - Mean Reward 743.26 - Mean Length 178.68 - Mean Loss 1.37 - Mean Q Value 40.048 - Time Delta 26.035Time 2023-05-30T02:09:56




Episode 40000 - Step 3266795 - Epsilon 0.4418879869544002 - Mean Reward 770.92 - Mean Length 196.03 - Mean Loss 1.39 - Mean Q Value 40.38 - Time Delta 58.253Time 2023-05-30T02:10:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_3270000.chkpt at step 3270000
Episode 40000 - Step 3271096 - Epsilon 0.44141310219267543 - Mean Reward 767.86 - Mean Length 191.92 - Mean Loss 1.408 - Mean Q Value 40.742 - Time Delta 46.189Time 2023-05-30T02:11:40




Episode 40000 - Step 3273785 - Epsilon 0.4411164619169895 - Mean Reward 738.83 - Mean Length 176.28 - Mean Loss 1.448 - Mean Q Value 41.486 - Time Delta 29.061Time 2023-05-30T02:12:09




Episode 40000 - Step 3276645 - Epsilon 0.4408011763352917 - Mean Reward 747.6 - Mean Length 177.74 - Mean Loss 1.476 - Mean Q Value 42.116 - Time Delta 30.577Time 2023-05-30T02:12:40




Episode 40000 - Step 3279678 - Epsilon 0.44046706548730713 - Mean Reward 773.97 - Mean Length 183.61 - Mean Loss 1.488 - Mean Q Value 42.531 - Time Delta 32.18Time 2023-05-30T02:13:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_3280000.chkpt at step 3280000
Episode 40000 - Step 3282723 - Epsilon 0.4401318874850736 - Mean Reward 725.48 - Mean Length 159.28 - Mean Loss 1.517 - Mean Q Value 42.762 - Time Delta 32.361Time 2023-05-30T02:13:44




Episode 40000 - Step 3285647 - Epsilon 0.43981026860109906 - Mean Reward 708.41 - Mean Length 145.51 - Mean Loss 1.521 - Mean Q Value 43.061 - Time Delta 31.093Time 2023-05-30T02:14:15




Episode 40000 - Step 3288999 - Epsilon 0.43944186193386026 - Mean Reward 715.6 - Mean Length 152.14 - Mean Loss 1.511 - Mean Q Value 43.093 - Time Delta 35.804Time 2023-05-30T02:14:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_3290000.chkpt at step 3290000
Episode 40000 - Step 3293930 - Epsilon 0.4389004736775612 - Mean Reward 709.41 - Mean Length 172.85 - Mean Loss 1.552 - Mean Q Value 43.29 - Time Delta 51.183Time 2023-05-30T02:15:42




Episode 40000 - Step 3298104 - Epsilon 0.4384427198505915 - Mean Reward 705.97 - Mean Length 184.26 - Mean Loss 1.59 - Mean Q Value 43.139 - Time Delta 44.491Time 2023-05-30T02:16:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3300000.chkpt at step 3300000
Episode 40000 - Step 3302524 - Epsilon 0.43795850816078974 - Mean Reward 727.82 - Mean Length 198.01 - Mean Loss 1.621 - Mean Q Value 42.704 - Time Delta 47.311Time 2023-05-30T02:17:14




Episode 40000 - Step 3307636 - Epsilon 0.4373991546204075 - Mean Reward 745.03 - Mean Length 219.89 - Mean Loss 1.681 - Mean Q Value 42.056 - Time Delta 54.254Time 2023-05-30T02:18:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_3310000.chkpt at step 3310000
Episode 40000 - Step 3310711 - Epsilon 0.4370630331914585 - Mean Reward 751.86 - Mean Length 217.12 - Mean Loss 1.736 - Mean Q Value 41.593 - Time Delta 32.842Time 2023-05-30T02:18:41




Episode 40000 - Step 3313743 - Epsilon 0.4367318648994978 - Mean Reward 753.69 - Mean Length 198.13 - Mean Loss 1.785 - Mean Q Value 40.931 - Time Delta 32.052Time 2023-05-30T02:19:13




Episode 40000 - Step 3317239 - Epsilon 0.4363503279577612 - Mean Reward 758.91 - Mean Length 191.35 - Mean Loss 1.813 - Mean Q Value 40.521 - Time Delta 36.989Time 2023-05-30T02:19:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_3320000.chkpt at step 3320000
Episode 40000 - Step 3320039 - Epsilon 0.4360449895708824 - Mean Reward 719.68 - Mean Length 175.15 - Mean Loss 1.825 - Mean Q Value 40.335 - Time Delta 29.843Time 2023-05-30T02:20:20




Episode 40000 - Step 3323988 - Epsilon 0.4356147165295626 - Mean Reward 717.66 - Mean Length 163.52 - Mean Loss 1.803 - Mean Q Value 40.143 - Time Delta 41.879Time 2023-05-30T02:21:02




Episode 40000 - Step 3327637 - Epsilon 0.4352175081589159 - Mean Reward 720.23 - Mean Length 169.26 - Mean Loss 1.755 - Mean Q Value 39.815 - Time Delta 38.122Time 2023-05-30T02:21:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_3330000.chkpt at step 3330000
Episode 40000 - Step 3330120 - Epsilon 0.4349474306908968 - Mean Reward 702.76 - Mean Length 163.77 - Mean Loss 1.695 - Mean Q Value 39.336 - Time Delta 26.605Time 2023-05-30T02:22:07




Episode 40000 - Step 3332622 - Epsilon 0.4346754561078832 - Mean Reward 662.93 - Mean Length 153.83 - Mean Loss 1.675 - Mean Q Value 38.872 - Time Delta 26.484Time 2023-05-30T02:22:33




Episode 40000 - Step 3335983 - Epsilon 0.43431037341204826 - Mean Reward 674.71 - Mean Length 159.44 - Mean Loss 1.657 - Mean Q Value 38.962 - Time Delta 36.099Time 2023-05-30T02:23:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_3340000.chkpt at step 3340000
Episode 40000 - Step 3340296 - Epsilon 0.4338423305722877 - Mean Reward 699.99 - Mean Length 163.08 - Mean Loss 1.665 - Mean Q Value 39.334 - Time Delta 45.871Time 2023-05-30T02:23:55




Episode 40000 - Step 3343909 - Epsilon 0.4334506393623554 - Mean Reward 719.57 - Mean Length 162.72 - Mean Loss 1.681 - Mean Q Value 39.917 - Time Delta 38.784Time 2023-05-30T02:24:34




Episode 40000 - Step 3346857 - Epsilon 0.4331313038907608 - Mean Reward 722.64 - Mean Length 167.37 - Mean Loss 1.687 - Mean Q Value 40.995 - Time Delta 31.606Time 2023-05-30T02:25:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_3350000.chkpt at step 3350000
Episode 40000 - Step 3352057 - Epsilon 0.43256859896273686 - Mean Reward 771.95 - Mean Length 194.35 - Mean Loss 1.698 - Mean Q Value 41.771 - Time Delta 55.228Time 2023-05-30T02:26:01




Episode 40000 - Step 3355417 - Epsilon 0.4322053938616566 - Mean Reward 781.53 - Mean Length 194.34 - Mean Loss 1.705 - Mean Q Value 41.919 - Time Delta 35.641Time 2023-05-30T02:26:36




Episode 40000 - Step 3357635 - Epsilon 0.4319658023736572 - Mean Reward 710.64 - Mean Length 173.39 - Mean Loss 1.695 - Mean Q Value 42.123 - Time Delta 23.519Time 2023-05-30T02:27:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_3360000.chkpt at step 3360000
Episode 40000 - Step 3361469 - Epsilon 0.43155196146523445 - Mean Reward 688.26 - Mean Length 175.6 - Mean Loss 1.707 - Mean Q Value 42.08 - Time Delta 40.146Time 2023-05-30T02:27:40




Episode 40000 - Step 3364517 - Epsilon 0.43122324408668955 - Mean Reward 699.56 - Mean Length 176.6 - Mean Loss 1.722 - Mean Q Value 41.729 - Time Delta 32.7Time 2023-05-30T02:28:13




Episode 40000 - Step 3368109 - Epsilon 0.43083617938312596 - Mean Reward 684.31 - Mean Length 160.52 - Mean Loss 1.703 - Mean Q Value 41.757 - Time Delta 38.417Time 2023-05-30T02:28:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_3370000.chkpt at step 3370000
Episode 40000 - Step 3375248 - Epsilon 0.43006793018613915 - Mean Reward 708.35 - Mean Length 198.31 - Mean Loss 1.715 - Mean Q Value 41.518 - Time Delta 76.242Time 2023-05-30T02:30:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_3380000.chkpt at step 3380000
Episode 40000 - Step 3381053 - Epsilon 0.4294442466942059 - Mean Reward 742.93 - Mean Length 234.18 - Mean Loss 1.714 - Mean Q Value 40.638 - Time Delta 61.311Time 2023-05-30T02:31:09




Episode 40000 - Step 3383597 - Epsilon 0.4291712069549813 - Mean Reward 709.64 - Mean Length 221.28 - Mean Loss 1.702 - Mean Q Value 39.529 - Time Delta 26.861Time 2023-05-30T02:31:36




Episode 40000 - Step 3389037 - Epsilon 0.42858793075824786 - Mean Reward 725.22 - Mean Length 245.2 - Mean Loss 1.686 - Mean Q Value 38.219 - Time Delta 57.999Time 2023-05-30T02:32:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_3390000.chkpt at step 3390000
Episode 40000 - Step 3392421 - Epsilon 0.4282254986538687 - Mean Reward 730.42 - Mean Length 243.12 - Mean Loss 1.699 - Mean Q Value 36.842 - Time Delta 35.801Time 2023-05-30T02:33:09




Episode 40000 - Step 3396548 - Epsilon 0.42778390478678235 - Mean Reward 732.4 - Mean Length 213.0 - Mean Loss 1.682 - Mean Q Value 35.833 - Time Delta 43.253Time 2023-05-30T02:33:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_3400000.chkpt at step 3400000
Episode 40000 - Step 3401611 - Epsilon 0.4272427797783187 - Mean Reward 737.57 - Mean Length 205.58 - Mean Loss 1.682 - Mean Q Value 35.152 - Time Delta 54.318Time 2023-05-30T02:34:47




Episode 40000 - Step 3405145 - Epsilon 0.4268654774330209 - Mean Reward 774.37 - Mean Length 215.48 - Mean Loss 1.718 - Mean Q Value 34.532 - Time Delta 37.456Time 2023-05-30T02:35:24




Episode 40000 - Step 3409054 - Epsilon 0.42644852685868007 - Mean Reward 763.06 - Mean Length 200.17 - Mean Loss 1.77 - Mean Q Value 34.036 - Time Delta 41.707Time 2023-05-30T02:36:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_3410000.chkpt at step 3410000
Episode 40000 - Step 3412648 - Epsilon 0.42606553489427745 - Mean Reward 768.41 - Mean Length 202.27 - Mean Loss 1.783 - Mean Q Value 33.668 - Time Delta 38.547Time 2023-05-30T02:36:45




Episode 40000 - Step 3416697 - Epsilon 0.42563446821306894 - Mean Reward 736.93 - Mean Length 201.49 - Mean Loss 1.804 - Mean Q Value 33.588 - Time Delta 43.037Time 2023-05-30T02:37:28




Episode 40000 - Step 3419921 - Epsilon 0.42529154500539396 - Mean Reward 709.68 - Mean Length 183.1 - Mean Loss 1.823 - Mean Q Value 33.849 - Time Delta 34.194Time 2023-05-30T02:38:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_3420000.chkpt at step 3420000
Episode 40000 - Step 3425427 - Epsilon 0.4247065338467875 - Mean Reward 683.37 - Mean Length 202.82 - Mean Loss 1.768 - Mean Q Value 34.285 - Time Delta 58.144Time 2023-05-30T02:39:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_3430000.chkpt at step 3430000
Episode 40000 - Step 3430216 - Epsilon 0.42419835815292156 - Mean Reward 705.21 - Mean Length 211.62 - Mean Loss 1.684 - Mean Q Value 34.782 - Time Delta 50.139Time 2023-05-30T02:39:50




Episode 40000 - Step 3434438 - Epsilon 0.4237508529423116 - Mean Reward 688.97 - Mean Length 217.9 - Mean Loss 1.601 - Mean Q Value 34.893 - Time Delta 44.934Time 2023-05-30T02:40:35




Episode 40000 - Step 3439607 - Epsilon 0.42320361449471067 - Mean Reward 690.25 - Mean Length 229.1 - Mean Loss 1.485 - Mean Q Value 34.599 - Time Delta 54.305Time 2023-05-30T02:41:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_3440000.chkpt at step 3440000
Episode 40000 - Step 3442271 - Epsilon 0.4229218546886279 - Mean Reward 692.35 - Mean Length 223.5 - Mean Loss 1.384 - Mean Q Value 33.974 - Time Delta 28.585Time 2023-05-30T02:41:58




Episode 40000 - Step 3446033 - Epsilon 0.4225242836216424 - Mean Reward 734.02 - Mean Length 206.06 - Mean Loss 1.337 - Mean Q Value 33.618 - Time Delta 40.202Time 2023-05-30T02:42:38




Episode 40000 - Step 3449736 - Epsilon 0.42213331271573423 - Mean Reward 712.05 - Mean Length 195.2 - Mean Loss 1.273 - Mean Q Value 33.36 - Time Delta 39.317Time 2023-05-30T02:43:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_3450000.chkpt at step 3450000
Episode 40000 - Step 3454689 - Epsilon 0.4216109295631097 - Mean Reward 743.05 - Mean Length 202.51 - Mean Loss 1.268 - Mean Q Value 33.122 - Time Delta 52.773Time 2023-05-30T02:44:10




Episode 40000 - Step 3457804 - Epsilon 0.4212827278207804 - Mean Reward 740.18 - Mean Length 181.97 - Mean Loss 1.276 - Mean Q Value 33.134 - Time Delta 33.098Time 2023-05-30T02:44:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_3460000.chkpt at step 3460000
Episode 40000 - Step 3461068 - Epsilon 0.4209391012905699 - Mean Reward 759.21 - Mean Length 187.97 - Mean Loss 1.264 - Mean Q Value 33.419 - Time Delta 34.533Time 2023-05-30T02:45:18




Episode 40000 - Step 3465981 - Epsilon 0.42042240015878996 - Mean Reward 758.92 - Mean Length 199.48 - Mean Loss 1.263 - Mean Q Value 33.795 - Time Delta 51.896Time 2023-05-30T02:46:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_3470000.chkpt at step 3470000
Episode 40000 - Step 3470107 - Epsilon 0.4199889579850151 - Mean Reward 787.98 - Mean Length 203.71 - Mean Loss 1.278 - Mean Q Value 34.224 - Time Delta 44.133Time 2023-05-30T02:46:54




Episode 40000 - Step 3474020 - Epsilon 0.4195783046290476 - Mean Reward 780.11 - Mean Length 193.31 - Mean Loss 1.281 - Mean Q Value 35.125 - Time Delta 41.495Time 2023-05-30T02:47:35




Episode 40000 - Step 3478305 - Epsilon 0.41912907197719795 - Mean Reward 798.43 - Mean Length 205.01 - Mean Loss 1.302 - Mean Q Value 36.002 - Time Delta 46.031Time 2023-05-30T02:48:21




MarioNet saved to ./checkpoints_pytorch/ mario_net_3480000.chkpt at step 3480000
Episode 40000 - Step 3481964 - Epsilon 0.41874584891402045 - Mean Reward 776.08 - Mean Length 208.96 - Mean Loss 1.329 - Mean Q Value 36.948 - Time Delta 38.851Time 2023-05-30T02:49:00




Episode 40000 - Step 3484624 - Epsilon 0.4184674754590896 - Mean Reward 725.56 - Mean Length 186.43 - Mean Loss 1.361 - Mean Q Value 37.362 - Time Delta 27.974Time 2023-05-30T02:49:28




Episode 40000 - Step 3488488 - Epsilon 0.41806343101212246 - Mean Reward 687.68 - Mean Length 183.81 - Mean Loss 1.401 - Mean Q Value 37.52 - Time Delta 41.322Time 2023-05-30T02:50:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_3490000.chkpt at step 3490000
Episode 40000 - Step 3492136 - Epsilon 0.41768233092340473 - Mean Reward 667.7 - Mean Length 181.16 - Mean Loss 1.429 - Mean Q Value 37.552 - Time Delta 38.873Time 2023-05-30T02:50:48




Episode 40000 - Step 3495800 - Epsilon 0.41729990903638653 - Mean Reward 657.87 - Mean Length 174.95 - Mean Loss 1.454 - Mean Q Value 37.53 - Time Delta 38.528Time 2023-05-30T02:51:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3500000.chkpt at step 3500000
Episode 40000 - Step 3501756 - Epsilon 0.4166790117668662 - Mean Reward 652.81 - Mean Length 197.92 - Mean Loss 1.46 - Mean Q Value 36.913 - Time Delta 63.591Time 2023-05-30T02:52:31




Episode 40000 - Step 3508755 - Epsilon 0.4159505650605358 - Mean Reward 665.13 - Mean Length 241.31 - Mean Loss 1.418 - Mean Q Value 35.803 - Time Delta 73.894Time 2023-05-30T02:53:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_3510000.chkpt at step 3510000
Episode 40000 - Step 3511798 - Epsilon 0.4156342509618148 - Mean Reward 670.57 - Mean Length 233.1 - Mean Loss 1.388 - Mean Q Value 34.731 - Time Delta 32.67Time 2023-05-30T02:54:17




Episode 40000 - Step 3515232 - Epsilon 0.41527758203477344 - Mean Reward 649.57 - Mean Length 230.96 - Mean Loss 1.338 - Mean Q Value 33.543 - Time Delta 36.76Time 2023-05-30T02:54:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_3520000.chkpt at step 3520000
Episode 40000 - Step 3521767 - Epsilon 0.4145996761154743 - Mean Reward 648.16 - Mean Length 259.67 - Mean Loss 1.293 - Mean Q Value 32.013 - Time Delta 69.072Time 2023-05-30T02:56:03




Episode 40000 - Step 3525933 - Epsilon 0.41416809528352194 - Mean Reward 672.6 - Mean Length 241.77 - Mean Loss 1.254 - Mean Q Value 30.801 - Time Delta 44.048Time 2023-05-30T02:56:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_3530000.chkpt at step 3530000
Episode 40000 - Step 3530642 - Epsilon 0.4136808027212312 - Mean Reward 723.38 - Mean Length 218.87 - Mean Loss 1.239 - Mean Q Value 30.206 - Time Delta 49.407Time 2023-05-30T02:57:36




Episode 40000 - Step 3534352 - Epsilon 0.4132972916094856 - Mean Reward 710.83 - Mean Length 225.54 - Mean Loss 1.244 - Mean Q Value 29.646 - Time Delta 39.269Time 2023-05-30T02:58:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_3540000.chkpt at step 3540000
Episode 40000 - Step 3540218 - Epsilon 0.4126916352616097 - Mean Reward 774.21 - Mean Length 249.86 - Mean Loss 1.242 - Mean Q Value 29.112 - Time Delta 62.565Time 2023-05-30T02:59:18




Episode 40000 - Step 3542557 - Epsilon 0.4124503843403132 - Mean Reward 743.98 - Mean Length 207.9 - Mean Loss 1.243 - Mean Q Value 29.082 - Time Delta 25.016Time 2023-05-30T02:59:43




Episode 40000 - Step 3545720 - Epsilon 0.4121243680737677 - Mean Reward 722.07 - Mean Length 197.87 - Mean Loss 1.231 - Mean Q Value 29.598 - Time Delta 33.937Time 2023-05-30T03:00:17




Episode 40000 - Step 3548156 - Epsilon 0.4118734607112604 - Mean Reward 667.59 - Mean Length 175.14 - Mean Loss 1.237 - Mean Q Value 30.51 - Time Delta 26.084Time 2023-05-30T03:00:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_3550000.chkpt at step 3550000
Episode 40000 - Step 3552007 - Epsilon 0.4114771203063377 - Mean Reward 706.91 - Mean Length 176.55 - Mean Loss 1.219 - Mean Q Value 31.604 - Time Delta 40.8Time 2023-05-30T03:01:24




Episode 40000 - Step 3555726 - Epsilon 0.4110947271984235 - Mean Reward 666.08 - Mean Length 155.08 - Mean Loss 1.216 - Mean Q Value 32.65 - Time Delta 39.579Time 2023-05-30T03:02:04




Episode 40000 - Step 3558108 - Epsilon 0.41084999313455084 - Mean Reward 668.14 - Mean Length 155.51 - Mean Loss 1.199 - Mean Q Value 33.847 - Time Delta 25.13Time 2023-05-30T03:02:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_3560000.chkpt at step 3560000
Episode 40000 - Step 3560634 - Epsilon 0.4105906232360939 - Mean Reward 679.38 - Mean Length 149.14 - Mean Loss 1.217 - Mean Q Value 34.851 - Time Delta 23.424Time 2023-05-30T03:02:52




Episode 40000 - Step 3564121 - Epsilon 0.4102328467839851 - Mean Reward 714.62 - Mean Length 159.65 - Mean Loss 1.219 - Mean Q Value 35.868 - Time Delta 36.202Time 2023-05-30T03:03:28




Episode 40000 - Step 3567553 - Epsilon 0.40988101791358594 - Mean Reward 722.96 - Mean Length 155.46 - Mean Loss 1.224 - Mean Q Value 36.934 - Time Delta 36.648Time 2023-05-30T03:04:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_3570000.chkpt at step 3570000
Episode 40000 - Step 3571555 - Epsilon 0.40947113698099885 - Mean Reward 764.77 - Mean Length 158.29 - Mean Loss 1.24 - Mean Q Value 38.198 - Time Delta 42.61Time 2023-05-30T03:04:48




Episode 40000 - Step 3574454 - Epsilon 0.4091744802513151 - Mean Reward 783.67 - Mean Length 163.46 - Mean Loss 1.263 - Mean Q Value 39.325 - Time Delta 30.745Time 2023-05-30T03:05:18




Episode 40000 - Step 3577768 - Epsilon 0.4088356195444524 - Mean Reward 791.83 - Mean Length 171.34 - Mean Loss 1.273 - Mean Q Value 40.344 - Time Delta 35.366Time 2023-05-30T03:05:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_3580000.chkpt at step 3580000
Episode 40000 - Step 3581282 - Epsilon 0.4084766151235361 - Mean Reward 784.01 - Mean Length 171.61 - Mean Loss 1.297 - Mean Q Value 41.309 - Time Delta 37.765Time 2023-05-30T03:06:32




Episode 40000 - Step 3585795 - Epsilon 0.4080160112119247 - Mean Reward 776.36 - Mean Length 182.42 - Mean Loss 1.331 - Mean Q Value 41.979 - Time Delta 48.311Time 2023-05-30T03:07:20




Episode 40000 - Step 3589899 - Epsilon 0.40759760141299367 - Mean Reward 765.87 - Mean Length 183.44 - Mean Loss 1.363 - Mean Q Value 42.385 - Time Delta 43.984Time 2023-05-30T03:08:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_3590000.chkpt at step 3590000
Episode 40000 - Step 3592645 - Epsilon 0.40731788164941407 - Mean Reward 752.67 - Mean Length 181.91 - Mean Loss 1.426 - Mean Q Value 42.697 - Time Delta 29.047Time 2023-05-30T03:08:33




Episode 40000 - Step 3596113 - Epsilon 0.4069648900458227 - Mean Reward 780.56 - Mean Length 183.45 - Mean Loss 1.471 - Mean Q Value 43.025 - Time Delta 36.9Time 2023-05-30T03:09:10




Episode 40000 - Step 3599241 - Epsilon 0.40664676786398013 - Mean Reward 780.39 - Mean Length 179.59 - Mean Loss 1.501 - Mean Q Value 43.054 - Time Delta 32.84Time 2023-05-30T03:09:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_3600000.chkpt at step 3600000
Episode 40000 - Step 3602902 - Epsilon 0.40627475463166346 - Mean Reward 778.43 - Mean Length 171.07 - Mean Loss 1.554 - Mean Q Value 43.146 - Time Delta 39.19Time 2023-05-30T03:10:22




Episode 40000 - Step 3605850 - Epsilon 0.4059754404108817 - Mean Reward 756.39 - Mean Length 159.51 - Mean Loss 1.599 - Mean Q Value 43.308 - Time Delta 31.478Time 2023-05-30T03:10:53




Episode 40000 - Step 3609711 - Epsilon 0.40558376163239074 - Mean Reward 770.12 - Mean Length 170.66 - Mean Loss 1.595 - Mean Q Value 43.313 - Time Delta 40.612Time 2023-05-30T03:11:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_3610000.chkpt at step 3610000
Episode 40000 - Step 3612537 - Epsilon 0.4052973178671272 - Mean Reward 737.3 - Mean Length 164.24 - Mean Loss 1.637 - Mean Q Value 43.122 - Time Delta 30.437Time 2023-05-30T03:12:04




Episode 40000 - Step 3616101 - Epsilon 0.40493635874290806 - Mean Reward 738.5 - Mean Length 168.6 - Mean Loss 1.714 - Mean Q Value 42.833 - Time Delta 38.131Time 2023-05-30T03:12:42




Episode 40000 - Step 3619335 - Epsilon 0.4046090999680982 - Mean Reward 732.65 - Mean Length 164.33 - Mean Loss 1.718 - Mean Q Value 42.707 - Time Delta 34.552Time 2023-05-30T03:13:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_3620000.chkpt at step 3620000
Episode 40000 - Step 3622946 - Epsilon 0.4042440038774573 - Mean Reward 738.46 - Mean Length 170.96 - Mean Loss 1.726 - Mean Q Value 42.67 - Time Delta 38.49Time 2023-05-30T03:13:56




Episode 40000 - Step 3627014 - Epsilon 0.4038330966558321 - Mean Reward 741.37 - Mean Length 173.03 - Mean Loss 1.741 - Mean Q Value 42.781 - Time Delta 43.11Time 2023-05-30T03:14:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_3630000.chkpt at step 3630000
Episode 40000 - Step 3630464 - Epsilon 0.403484940730223 - Mean Reward 734.1 - Mean Length 179.27 - Mean Loss 1.727 - Mean Q Value 42.708 - Time Delta 36.414Time 2023-05-30T03:15:15




Episode 40000 - Step 3633652 - Epsilon 0.40316349130684326 - Mean Reward 690.2 - Mean Length 175.51 - Mean Loss 1.705 - Mean Q Value 42.629 - Time Delta 33.481Time 2023-05-30T03:15:49




Episode 40000 - Step 3638367 - Epsilon 0.40268854226011835 - Mean Reward 668.24 - Mean Length 190.32 - Mean Loss 1.727 - Mean Q Value 42.118 - Time Delta 50.532Time 2023-05-30T03:16:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_3640000.chkpt at step 3640000
Episode 40000 - Step 3641538 - Epsilon 0.4023694373802173 - Mean Reward 641.68 - Mean Length 185.92 - Mean Loss 1.757 - Mean Q Value 41.242 - Time Delta 33.933Time 2023-05-30T03:17:13




Episode 40000 - Step 3646813 - Epsilon 0.40183916234526684 - Mean Reward 656.84 - Mean Length 197.99 - Mean Loss 1.797 - Mean Q Value 40.071 - Time Delta 55.76Time 2023-05-30T03:18:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_3650000.chkpt at step 3650000
Episode 40000 - Step 3650306 - Epsilon 0.40148840942263986 - Mean Reward 687.52 - Mean Length 198.42 - Mean Loss 1.834 - Mean Q Value 38.986 - Time Delta 37.421Time 2023-05-30T03:18:46




Episode 40000 - Step 3654177 - Epsilon 0.40110005691020184 - Mean Reward 737.7 - Mean Length 205.25 - Mean Loss 1.854 - Mean Q Value 38.029 - Time Delta 41.028Time 2023-05-30T03:19:27




Episode 40000 - Step 3657297 - Epsilon 0.40078732080961027 - Mean Reward 753.28 - Mean Length 189.3 - Mean Loss 1.848 - Mean Q Value 37.41 - Time Delta 33.568Time 2023-05-30T03:20:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_3660000.chkpt at step 3660000
Episode 40000 - Step 3661360 - Epsilon 0.40038042772338767 - Mean Reward 761.48 - Mean Length 198.22 - Mean Loss 1.83 - Mean Q Value 36.895 - Time Delta 43.194Time 2023-05-30T03:20:44




Episode 40000 - Step 3665257 - Epsilon 0.39999054699437797 - Mean Reward 746.08 - Mean Length 184.44 - Mean Loss 1.822 - Mean Q Value 36.526 - Time Delta 41.047Time 2023-05-30T03:21:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_3670000.chkpt at step 3670000
Episode 40000 - Step 3671489 - Epsilon 0.3993678468536042 - Mean Reward 738.26 - Mean Length 211.83 - Mean Loss 1.784 - Mean Q Value 36.174 - Time Delta 66.457Time 2023-05-30T03:22:31




Episode 40000 - Step 3675981 - Epsilon 0.39891960843847235 - Mean Reward 742.47 - Mean Length 218.04 - Mean Loss 1.716 - Mean Q Value 35.529 - Time Delta 48.093Time 2023-05-30T03:23:20




Episode 40000 - Step 3679476 - Epsilon 0.3985712046180855 - Mean Reward 744.75 - Mean Length 221.79 - Mean Loss 1.666 - Mean Q Value 34.975 - Time Delta 37.329Time 2023-05-30T03:23:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_3680000.chkpt at step 3680000
Episode 40000 - Step 3682803 - Epsilon 0.3982398308063647 - Mean Reward 749.96 - Mean Length 214.43 - Mean Loss 1.59 - Mean Q Value 34.703 - Time Delta 35.776Time 2023-05-30T03:24:33




Episode 40000 - Step 3685648 - Epsilon 0.39795668339750356 - Mean Reward 736.06 - Mean Length 203.91 - Mean Loss 1.479 - Mean Q Value 34.811 - Time Delta 30.369Time 2023-05-30T03:25:03




Episode 40000 - Step 3689320 - Epsilon 0.3975915267489918 - Mean Reward 749.49 - Mean Length 178.31 - Mean Loss 1.394 - Mean Q Value 35.159 - Time Delta 38.916Time 2023-05-30T03:25:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_3690000.chkpt at step 3690000
Episode 40000 - Step 3692902 - Epsilon 0.39723564286304464 - Mean Reward 744.79 - Mean Length 169.21 - Mean Loss 1.339 - Mean Q Value 35.74 - Time Delta 38.1Time 2023-05-30T03:26:20




Episode 40000 - Step 3695692 - Epsilon 0.3969586675738501 - Mean Reward 730.01 - Mean Length 162.16 - Mean Loss 1.271 - Mean Q Value 36.258 - Time Delta 29.736Time 2023-05-30T03:26:50




Episode 40000 - Step 3698452 - Epsilon 0.3966848605332524 - Mean Reward 707.45 - Mean Length 156.49 - Mean Loss 1.212 - Mean Q Value 36.681 - Time Delta 29.01Time 2023-05-30T03:27:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_3700000.chkpt at step 3700000
Episode 40000 - Step 3701928 - Epsilon 0.3963402910834382 - Mean Reward 715.0 - Mean Length 162.8 - Mean Loss 1.224 - Mean Q Value 36.924 - Time Delta 36.864Time 2023-05-30T03:27:56




Episode 40000 - Step 3705730 - Epsilon 0.3959637485698229 - Mean Reward 711.68 - Mean Length 164.1 - Mean Loss 1.257 - Mean Q Value 37.047 - Time Delta 40.725Time 2023-05-30T03:28:36




Episode 40000 - Step 3709666 - Epsilon 0.3955743118267971 - Mean Reward 708.08 - Mean Length 167.64 - Mean Loss 1.278 - Mean Q Value 37.428 - Time Delta 42.027Time 2023-05-30T03:29:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_3710000.chkpt at step 3710000
Episode 40000 - Step 3714190 - Epsilon 0.3951271701304579 - Mean Reward 717.94 - Mean Length 184.98 - Mean Loss 1.34 - Mean Q Value 37.918 - Time Delta 48.386Time 2023-05-30T03:30:07




Episode 40000 - Step 3717957 - Epsilon 0.39475523423424863 - Mean Reward 738.4 - Mean Length 195.05 - Mean Loss 1.41 - Mean Q Value 38.539 - Time Delta 40.439Time 2023-05-30T03:30:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_3720000.chkpt at step 3720000
Episode 40000 - Step 3720902 - Epsilon 0.3944647026217595 - Mean Reward 728.37 - Mean Length 189.74 - Mean Loss 1.451 - Mean Q Value 38.959 - Time Delta 31.626Time 2023-05-30T03:31:19




Episode 40000 - Step 3723873 - Epsilon 0.394171822708979 - Mean Reward 730.02 - Mean Length 181.43 - Mean Loss 1.488 - Mean Q Value 39.501 - Time Delta 31.812Time 2023-05-30T03:31:51




Episode 40000 - Step 3726686 - Epsilon 0.3938947187880692 - Mean Reward 722.4 - Mean Length 170.2 - Mean Loss 1.512 - Mean Q Value 39.913 - Time Delta 30.02Time 2023-05-30T03:32:21




Episode 40000 - Step 3729819 - Epsilon 0.39358632150283895 - Mean Reward 734.74 - Mean Length 156.29 - Mean Loss 1.505 - Mean Q Value 40.288 - Time Delta 33.286Time 2023-05-30T03:32:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_3730000.chkpt at step 3730000
Episode 40000 - Step 3733912 - Epsilon 0.3931837902291749 - Mean Reward 764.52 - Mean Length 159.55 - Mean Loss 1.512 - Mean Q Value 40.512 - Time Delta 42.771Time 2023-05-30T03:33:37




Episode 40000 - Step 3737572 - Epsilon 0.3928241915575964 - Mean Reward 801.73 - Mean Length 166.7 - Mean Loss 1.504 - Mean Q Value 40.796 - Time Delta 39.198Time 2023-05-30T03:34:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_3740000.chkpt at step 3740000
Episode 40000 - Step 3741006 - Epsilon 0.3924870966656478 - Mean Reward 779.95 - Mean Length 171.33 - Mean Loss 1.479 - Mean Q Value 40.901 - Time Delta 36.863Time 2023-05-30T03:34:53




Episode 40000 - Step 3743748 - Epsilon 0.3922181389230242 - Mean Reward 773.86 - Mean Length 170.62 - Mean Loss 1.488 - Mean Q Value 41.036 - Time Delta 29.143Time 2023-05-30T03:35:22




Episode 40000 - Step 3747885 - Epsilon 0.3918126969624209 - Mean Reward 742.33 - Mean Length 180.66 - Mean Loss 1.478 - Mean Q Value 41.118 - Time Delta 44.093Time 2023-05-30T03:36:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_3750000.chkpt at step 3750000
Episode 40000 - Step 3750857 - Epsilon 0.39152168821529904 - Mean Reward 712.74 - Mean Length 169.45 - Mean Loss 1.481 - Mean Q Value 41.152 - Time Delta 32.061Time 2023-05-30T03:36:38




Episode 40000 - Step 3755267 - Epsilon 0.3910902733611913 - Mean Reward 718.5 - Mean Length 176.95 - Mean Loss 1.511 - Mean Q Value 41.335 - Time Delta 47.456Time 2023-05-30T03:37:26




Episode 40000 - Step 3758908 - Epsilon 0.3907344453661221 - Mean Reward 755.2 - Mean Length 179.02 - Mean Loss 1.534 - Mean Q Value 41.656 - Time Delta 39.019Time 2023-05-30T03:38:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_3760000.chkpt at step 3760000
Episode 40000 - Step 3761741 - Epsilon 0.39045780563718163 - Mean Reward 754.94 - Mean Length 179.93 - Mean Loss 1.573 - Mean Q Value 41.934 - Time Delta 30.286Time 2023-05-30T03:38:35




Episode 40000 - Step 3764298 - Epsilon 0.3902082852151644 - Mean Reward 752.72 - Mean Length 164.13 - Mean Loss 1.62 - Mean Q Value 42.244 - Time Delta 27.207Time 2023-05-30T03:39:02




Episode 40000 - Step 3766682 - Epsilon 0.38997579033843754 - Mean Reward 727.54 - Mean Length 158.25 - Mean Loss 1.617 - Mean Q Value 42.526 - Time Delta 24.763Time 2023-05-30T03:39:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3770000.chkpt at step 3770000
Episode 40000 - Step 3770850 - Epsilon 0.3895696471514258 - Mean Reward 709.28 - Mean Length 155.83 - Mean Loss 1.62 - Mean Q Value 42.527 - Time Delta 44.182Time 2023-05-30T03:40:11




Episode 40000 - Step 3773133 - Epsilon 0.3893473636876751 - Mean Reward 648.8 - Mean Length 142.25 - Mean Loss 1.652 - Mean Q Value 42.377 - Time Delta 24.525Time 2023-05-30T03:40:36




Episode 40000 - Step 3775694 - Epsilon 0.38909816379051954 - Mean Reward 631.71 - Mean Length 139.53 - Mean Loss 1.632 - Mean Q Value 42.376 - Time Delta 27.417Time 2023-05-30T03:41:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_3780000.chkpt at step 3780000
Episode 40000 - Step 3780142 - Epsilon 0.3886657270576539 - Mean Reward 658.56 - Mean Length 158.44 - Mean Loss 1.63 - Mean Q Value 42.132 - Time Delta 47.446Time 2023-05-30T03:41:50




Episode 40000 - Step 3785608 - Epsilon 0.38813497799210006 - Mean Reward 707.06 - Mean Length 189.26 - Mean Loss 1.664 - Mean Q Value 41.573 - Time Delta 58.27Time 2023-05-30T03:42:49




Episode 40000 - Step 3788749 - Epsilon 0.3878303145966263 - Mean Reward 665.36 - Mean Length 178.99 - Mean Loss 1.671 - Mean Q Value 41.117 - Time Delta 33.459Time 2023-05-30T03:43:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_3790000.chkpt at step 3790000
Episode 40000 - Step 3794827 - Epsilon 0.387241453860152 - Mean Reward 696.36 - Mean Length 216.94 - Mean Loss 1.663 - Mean Q Value 40.502 - Time Delta 64.721Time 2023-05-30T03:44:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_3800000.chkpt at step 3800000
Episode 40000 - Step 3800048 - Epsilon 0.3867363366131472 - Mean Reward 708.05 - Mean Length 243.54 - Mean Loss 1.663 - Mean Q Value 39.28 - Time Delta 55.393Time 2023-05-30T03:45:22




Episode 40000 - Step 3803836 - Epsilon 0.3863702706161724 - Mean Reward 716.93 - Mean Length 236.94 - Mean Loss 1.682 - Mean Q Value 37.857 - Time Delta 40.354Time 2023-05-30T03:46:03




Episode 40000 - Step 3808457 - Epsilon 0.38592417403137264 - Mean Reward 713.74 - Mean Length 228.49 - Mean Loss 1.674 - Mean Q Value 36.408 - Time Delta 49.302Time 2023-05-30T03:46:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_3810000.chkpt at step 3810000
Episode 40000 - Step 3812166 - Epsilon 0.38556649165236256 - Mean Reward 752.98 - Mean Length 234.17 - Mean Loss 1.671 - Mean Q Value 35.123 - Time Delta 39.251Time 2023-05-30T03:47:31




Episode 40000 - Step 3814917 - Epsilon 0.38530140943015545 - Mean Reward 740.54 - Mean Length 200.9 - Mean Loss 1.671 - Mean Q Value 33.929 - Time Delta 29.517Time 2023-05-30T03:48:01




Episode 40000 - Step 3818709 - Epsilon 0.38493631672960965 - Mean Reward 767.9 - Mean Length 186.61 - Mean Loss 1.665 - Mean Q Value 33.236 - Time Delta 40.812Time 2023-05-30T03:48:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_3820000.chkpt at step 3820000
Episode 40000 - Step 3822441 - Epsilon 0.3845773385905213 - Mean Reward 750.15 - Mean Length 186.05 - Mean Loss 1.617 - Mean Q Value 32.885 - Time Delta 39.436Time 2023-05-30T03:49:21




Episode 40000 - Step 3825963 - Epsilon 0.3842388672354179 - Mean Reward 728.09 - Mean Length 175.06 - Mean Loss 1.581 - Mean Q Value 33.211 - Time Delta 37.536Time 2023-05-30T03:49:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_3830000.chkpt at step 3830000
Episode 40000 - Step 3831238 - Epsilon 0.3837324861343086 - Mean Reward 739.72 - Mean Length 190.72 - Mean Loss 1.536 - Mean Q Value 33.439 - Time Delta 56.125Time 2023-05-30T03:50:55




Episode 40000 - Step 3835346 - Epsilon 0.3833385951194133 - Mean Reward 781.77 - Mean Length 204.29 - Mean Loss 1.484 - Mean Q Value 33.999 - Time Delta 43.158Time 2023-05-30T03:51:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_3840000.chkpt at step 3840000
Episode 40000 - Step 3840308 - Epsilon 0.3828633583592397 - Mean Reward 796.41 - Mean Length 215.99 - Mean Loss 1.437 - Mean Q Value 34.654 - Time Delta 53.074Time 2023-05-30T03:52:31




Episode 40000 - Step 3843470 - Epsilon 0.38256082442888933 - Mean Reward 788.02 - Mean Length 210.29 - Mean Loss 1.394 - Mean Q Value 35.501 - Time Delta 33.765Time 2023-05-30T03:53:05




Episode 40000 - Step 3846331 - Epsilon 0.3822872955973968 - Mean Reward 791.76 - Mean Length 203.68 - Mean Loss 1.365 - Mean Q Value 36.27 - Time Delta 30.652Time 2023-05-30T03:53:35




Episode 40000 - Step 3849621 - Epsilon 0.3819729935318794 - Mean Reward 766.89 - Mean Length 183.83 - Mean Loss 1.333 - Mean Q Value 37.164 - Time Delta 35.322Time 2023-05-30T03:54:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_3850000.chkpt at step 3850000
Episode 40000 - Step 3854221 - Epsilon 0.3815339770172587 - Mean Reward 737.76 - Mean Length 188.75 - Mean Loss 1.325 - Mean Q Value 37.594 - Time Delta 49.262Time 2023-05-30T03:55:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_3860000.chkpt at step 3860000
Episode 40000 - Step 3861044 - Epsilon 0.3808837300916009 - Mean Reward 696.72 - Mean Length 207.36 - Mean Loss 1.307 - Mean Q Value 37.459 - Time Delta 72.665Time 2023-05-30T03:56:12




Episode 40000 - Step 3864488 - Epsilon 0.3805559302970339 - Mean Reward 709.81 - Mean Length 210.18 - Mean Loss 1.299 - Mean Q Value 36.817 - Time Delta 36.839Time 2023-05-30T03:56:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_3870000.chkpt at step 3870000
Episode 40000 - Step 3870914 - Epsilon 0.3799450579331712 - Mean Reward 757.51 - Mean Length 245.83 - Mean Loss 1.27 - Mean Q Value 35.842 - Time Delta 68.113Time 2023-05-30T03:57:57




Episode 40000 - Step 3876661 - Epsilon 0.3793995637661311 - Mean Reward 764.01 - Mean Length 270.4 - Mean Loss 1.241 - Mean Q Value 34.452 - Time Delta 61.62Time 2023-05-30T03:58:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_3880000.chkpt at step 3880000
Episode 40000 - Step 3880300 - Epsilon 0.37905456192632975 - Mean Reward 765.23 - Mean Length 260.79 - Mean Loss 1.168 - Mean Q Value 33.321 - Time Delta 39.231Time 2023-05-30T03:59:38




Episode 40000 - Step 3883814 - Epsilon 0.3787217086792186 - Mean Reward 774.64 - Mean Length 227.7 - Mean Loss 1.132 - Mean Q Value 32.546 - Time Delta 37.893Time 2023-05-30T04:00:16




Episode 40000 - Step 3886589 - Epsilon 0.3784590615772027 - Mean Reward 776.99 - Mean Length 221.01 - Mean Loss 1.089 - Mean Q Value 32.103 - Time Delta 29.63Time 2023-05-30T04:00:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_3890000.chkpt at step 3890000
Episode 40000 - Step 3890101 - Epsilon 0.3781269203109283 - Mean Reward 714.65 - Mean Length 191.87 - Mean Loss 1.065 - Mean Q Value 31.796 - Time Delta 37.124Time 2023-05-30T04:01:23




Episode 40000 - Step 3893788 - Epsilon 0.3777785423618891 - Mean Reward 745.93 - Mean Length 171.27 - Mean Loss 1.064 - Mean Q Value 31.875 - Time Delta 39.376Time 2023-05-30T04:02:02




Episode 40000 - Step 3896951 - Epsilon 0.37747993202077096 - Mean Reward 753.16 - Mean Length 166.51 - Mean Loss 1.079 - Mean Q Value 32.296 - Time Delta 33.775Time 2023-05-30T04:02:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_3900000.chkpt at step 3900000
Episode 40000 - Step 3900074 - Epsilon 0.37718532954738654 - Mean Reward 754.51 - Mean Length 162.6 - Mean Loss 1.093 - Mean Q Value 33.052 - Time Delta 33.479Time 2023-05-30T04:03:10




Episode 40000 - Step 3904057 - Epsilon 0.3768099341395208 - Mean Reward 743.16 - Mean Length 174.68 - Mean Loss 1.146 - Mean Q Value 34.171 - Time Delta 42.589Time 2023-05-30T04:03:52




Episode 40000 - Step 3908391 - Epsilon 0.3764018816271854 - Mean Reward 784.65 - Mean Length 182.9 - Mean Loss 1.195 - Mean Q Value 35.252 - Time Delta 46.267Time 2023-05-30T04:04:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_3910000.chkpt at step 3910000
Episode 40000 - Step 3912198 - Epsilon 0.3760438115151815 - Mean Reward 763.96 - Mean Length 184.1 - Mean Loss 1.229 - Mean Q Value 36.381 - Time Delta 40.574Time 2023-05-30T04:05:19




Episode 40000 - Step 3915921 - Epsilon 0.3756939715258768 - Mean Reward 755.32 - Mean Length 189.7 - Mean Loss 1.282 - Mean Q Value 37.61 - Time Delta 39.712Time 2023-05-30T04:05:59




Episode 40000 - Step 3919131 - Epsilon 0.3753925980183077 - Mean Reward 760.73 - Mean Length 190.57 - Mean Loss 1.329 - Mean Q Value 38.677 - Time Delta 34.567Time 2023-05-30T04:06:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_3920000.chkpt at step 3920000
Episode 40000 - Step 3921355 - Epsilon 0.3751839377206358 - Mean Reward 738.74 - Mean Length 172.98 - Mean Loss 1.37 - Mean Q Value 39.534 - Time Delta 23.696Time 2023-05-30T04:06:57




Episode 40000 - Step 3923387 - Epsilon 0.3749933926589724 - Mean Reward 671.49 - Mean Length 149.96 - Mean Loss 1.416 - Mean Q Value 40.552 - Time Delta 21.528Time 2023-05-30T04:07:18




Episode 40000 - Step 3926465 - Epsilon 0.3747049462012331 - Mean Reward 684.4 - Mean Length 142.67 - Mean Loss 1.444 - Mean Q Value 41.492 - Time Delta 32.894Time 2023-05-30T04:07:51




Episode 40000 - Step 3929537 - Epsilon 0.3744172832432669 - Mean Reward 673.54 - Mean Length 136.16 - Mean Loss 1.465 - Mean Q Value 41.976 - Time Delta 32.736Time 2023-05-30T04:08:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_3930000.chkpt at step 3930000
Episode 40000 - Step 3933289 - Epsilon 0.37406624445058917 - Mean Reward 676.23 - Mean Length 141.58 - Mean Loss 1.487 - Mean Q Value 42.262 - Time Delta 39.897Time 2023-05-30T04:09:04




Episode 40000 - Step 3935954 - Epsilon 0.37381710578758054 - Mean Reward 701.35 - Mean Length 145.99 - Mean Loss 1.469 - Mean Q Value 42.413 - Time Delta 27.801Time 2023-05-30T04:09:32




Episode 40000 - Step 3938744 - Epsilon 0.3735564592347285 - Mean Reward 728.81 - Mean Length 153.57 - Mean Loss 1.46 - Mean Q Value 42.439 - Time Delta 29.873Time 2023-05-30T04:10:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_3940000.chkpt at step 3940000
Episode 40000 - Step 3943075 - Epsilon 0.37315220981792546 - Mean Reward 683.89 - Mean Length 166.1 - Mean Loss 1.488 - Mean Q Value 42.432 - Time Delta 46.735Time 2023-05-30T04:10:48




Episode 40000 - Step 3946418 - Epsilon 0.3728404781025968 - Mean Reward 696.93 - Mean Length 168.81 - Mean Loss 1.5 - Mean Q Value 42.329 - Time Delta 35.543Time 2023-05-30T04:11:24




Episode 40000 - Step 3949614 - Epsilon 0.37254269750252134 - Mean Reward 657.82 - Mean Length 163.25 - Mean Loss 1.483 - Mean Q Value 42.229 - Time Delta 34.253Time 2023-05-30T04:11:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_3950000.chkpt at step 3950000
Episode 40000 - Step 3953301 - Epsilon 0.3721994644399855 - Mean Reward 681.58 - Mean Length 173.47 - Mean Loss 1.51 - Mean Q Value 42.18 - Time Delta 39.933Time 2023-05-30T04:12:38




Episode 40000 - Step 3956918 - Epsilon 0.3718630551541381 - Mean Reward 698.16 - Mean Length 181.74 - Mean Loss 1.532 - Mean Q Value 42.24 - Time Delta 38.515Time 2023-05-30T04:13:17




Episode 40000 - Step 3959938 - Epsilon 0.37158240447134083 - Mean Reward 705.96 - Mean Length 168.63 - Mean Loss 1.535 - Mean Q Value 42.051 - Time Delta 32.126Time 2023-05-30T04:13:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_3960000.chkpt at step 3960000
Episode 40000 - Step 3963343 - Epsilon 0.37126622950103255 - Mean Reward 706.16 - Mean Length 169.25 - Mean Loss 1.557 - Mean Q Value 41.916 - Time Delta 36.398Time 2023-05-30T04:14:25




Episode 40000 - Step 3966730 - Epsilon 0.3709519928405992 - Mean Reward 750.56 - Mean Length 171.16 - Mean Loss 1.589 - Mean Q Value 41.697 - Time Delta 35.905Time 2023-05-30T04:15:01




Episode 40000 - Step 3969756 - Epsilon 0.37067147374263787 - Mean Reward 742.33 - Mean Length 164.55 - Mean Loss 1.579 - Mean Q Value 41.435 - Time Delta 31.731Time 2023-05-30T04:15:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_3970000.chkpt at step 3970000
Episode 40000 - Step 3972398 - Epsilon 0.3704267260404297 - Mean Reward 728.23 - Mean Length 154.8 - Mean Loss 1.598 - Mean Q Value 41.187 - Time Delta 28.205Time 2023-05-30T04:16:01




Episode 40000 - Step 3977074 - Epsilon 0.3699939501503248 - Mean Reward 754.03 - Mean Length 171.36 - Mean Loss 1.591 - Mean Q Value 41.011 - Time Delta 49.917Time 2023-05-30T04:16:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_3980000.chkpt at step 3980000
Episode 40000 - Step 3982252 - Epsilon 0.36951530279329997 - Mean Reward 751.14 - Mean Length 189.09 - Mean Loss 1.571 - Mean Q Value 40.519 - Time Delta 55.604Time 2023-05-30T04:17:47




Episode 40000 - Step 3986022 - Epsilon 0.36916719864670056 - Mean Reward 755.04 - Mean Length 192.92 - Mean Loss 1.581 - Mean Q Value 40.197 - Time Delta 40.53Time 2023-05-30T04:18:27




Episode 40000 - Step 3989222 - Epsilon 0.36887198295286266 - Mean Reward 742.56 - Mean Length 194.66 - Mean Loss 1.596 - Mean Q Value 40.008 - Time Delta 34.37Time 2023-05-30T04:19:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_3990000.chkpt at step 3990000
Episode 40000 - Step 3992366 - Epsilon 0.368582163451979 - Mean Reward 756.88 - Mean Length 199.68 - Mean Loss 1.614 - Mean Q Value 39.74 - Time Delta 33.286Time 2023-05-30T04:19:35




Episode 40000 - Step 3995301 - Epsilon 0.36831181545176717 - Mean Reward 734.74 - Mean Length 182.27 - Mean Loss 1.652 - Mean Q Value 39.669 - Time Delta 31.615Time 2023-05-30T04:20:06




Episode 40000 - Step 3998348 - Epsilon 0.36803136072300685 - Mean Reward 732.79 - Mean Length 160.96 - Mean Loss 1.686 - Mean Q Value 40.059 - Time Delta 32.37Time 2023-05-30T04:20:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_4000000.chkpt at step 4000000
Episode 40000 - Step 4002163 - Epsilon 0.3676805181034803 - Mean Reward 726.5 - Mean Length 161.41 - Mean Loss 1.7 - Mean Q Value 40.353 - Time Delta 40.099Time 2023-05-30T04:21:19




Episode 40000 - Step 4005474 - Epsilon 0.36737629644365966 - Mean Reward 715.68 - Mean Length 162.52 - Mean Loss 1.716 - Mean Q Value 40.397 - Time Delta 35.284Time 2023-05-30T04:21:54




Episode 40000 - Step 4008929 - Epsilon 0.3670591121316081 - Mean Reward 702.73 - Mean Length 165.63 - Mean Loss 1.678 - Mean Q Value 40.37 - Time Delta 36.848Time 2023-05-30T04:22:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_4010000.chkpt at step 4010000
Episode 40000 - Step 4011680 - Epsilon 0.3668067539852655 - Mean Reward 696.37 - Mean Length 163.79 - Mean Loss 1.677 - Mean Q Value 40.109 - Time Delta 29.637Time 2023-05-30T04:23:01




Episode 40000 - Step 4014377 - Epsilon 0.3665595178592974 - Mean Reward 698.81 - Mean Length 160.29 - Mean Loss 1.657 - Mean Q Value 39.882 - Time Delta 28.477Time 2023-05-30T04:23:29




Episode 40000 - Step 4016882 - Epsilon 0.3663300317978442 - Mean Reward 657.15 - Mean Length 147.19 - Mean Loss 1.626 - Mean Q Value 39.8 - Time Delta 26.941Time 2023-05-30T04:23:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_4020000.chkpt at step 4020000
Episode 40000 - Step 4021313 - Epsilon 0.365924454335059 - Mean Reward 689.37 - Mean Length 158.39 - Mean Loss 1.623 - Mean Q Value 39.948 - Time Delta 47.771Time 2023-05-30T04:24:44




Episode 40000 - Step 4025040 - Epsilon 0.3655836629729168 - Mean Reward 713.49 - Mean Length 161.11 - Mean Loss 1.623 - Mean Q Value 40.275 - Time Delta 39.606Time 2023-05-30T04:25:23




Episode 40000 - Step 4028214 - Epsilon 0.365293687363098 - Mean Reward 738.86 - Mean Length 165.34 - Mean Loss 1.592 - Mean Q Value 40.72 - Time Delta 33.898Time 2023-05-30T04:25:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_4030000.chkpt at step 4030000
Episode 40000 - Step 4032223 - Epsilon 0.3649277551275679 - Mean Reward 785.51 - Mean Length 178.46 - Mean Loss 1.611 - Mean Q Value 41.218 - Time Delta 40.664Time 2023-05-30T04:26:38




Episode 40000 - Step 4034868 - Epsilon 0.36468652638434546 - Mean Reward 814.87 - Mean Length 179.86 - Mean Loss 1.633 - Mean Q Value 41.533 - Time Delta 28.201Time 2023-05-30T04:27:06




Episode 40000 - Step 4037496 - Epsilon 0.3644470059976006 - Mean Reward 779.01 - Mean Length 161.83 - Mean Loss 1.621 - Mean Q Value 41.732 - Time Delta 27.538Time 2023-05-30T04:27:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_4040000.chkpt at step 4040000
Episode 40000 - Step 4040519 - Epsilon 0.3641716791905853 - Mean Reward 769.32 - Mean Length 154.79 - Mean Loss 1.635 - Mean Q Value 41.926 - Time Delta 32.226Time 2023-05-30T04:28:06




Episode 40000 - Step 4044208 - Epsilon 0.3638359766420738 - Mean Reward 799.16 - Mean Length 159.94 - Mean Loss 1.647 - Mean Q Value 42.435 - Time Delta 39.562Time 2023-05-30T04:28:45




Episode 40000 - Step 4047871 - Epsilon 0.3635029463143237 - Mean Reward 763.06 - Mean Length 156.48 - Mean Loss 1.62 - Mean Q Value 42.8 - Time Delta 38.97Time 2023-05-30T04:29:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_4050000.chkpt at step 4050000
Episode 40000 - Step 4051073 - Epsilon 0.36321207860473265 - Mean Reward 771.86 - Mean Length 162.05 - Mean Loss 1.63 - Mean Q Value 43.379 - Time Delta 34.486Time 2023-05-30T04:29:59




Episode 40000 - Step 4053544 - Epsilon 0.3629877736044455 - Mean Reward 766.43 - Mean Length 160.48 - Mean Loss 1.681 - Mean Q Value 43.924 - Time Delta 26.835Time 2023-05-30T04:30:26




Episode 40000 - Step 4056284 - Epsilon 0.36273921209045956 - Mean Reward 757.77 - Mean Length 157.65 - Mean Loss 1.705 - Mean Q Value 44.287 - Time Delta 29.284Time 2023-05-30T04:30:55




Episode 40000 - Step 4059410 - Epsilon 0.3624558421019948 - Mean Reward 723.65 - Mean Length 152.02 - Mean Loss 1.707 - Mean Q Value 44.506 - Time Delta 32.815Time 2023-05-30T04:31:28




MarioNet saved to ./checkpoints_pytorch/ mario_net_4060000.chkpt at step 4060000
Episode 40000 - Step 4062581 - Epsilon 0.36216861906030073 - Mean Reward 714.21 - Mean Length 147.1 - Mean Loss 1.759 - Mean Q Value 44.902 - Time Delta 33.92Time 2023-05-30T04:32:02




Episode 40000 - Step 4067088 - Epsilon 0.3617607753298483 - Mean Reward 717.1 - Mean Length 160.15 - Mean Loss 1.766 - Mean Q Value 45.204 - Time Delta 47.808Time 2023-05-30T04:32:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_4070000.chkpt at step 4070000
Episode 40000 - Step 4071541 - Epsilon 0.3613582691828962 - Mean Reward 748.96 - Mean Length 179.97 - Mean Loss 1.772 - Mean Q Value 45.156 - Time Delta 46.56Time 2023-05-30T04:33:36




Episode 40000 - Step 4076054 - Epsilon 0.3609507965734104 - Mean Reward 784.44 - Mean Length 197.7 - Mean Loss 1.805 - Mean Q Value 44.841 - Time Delta 48.344Time 2023-05-30T04:34:24




Episode 40000 - Step 4079712 - Epsilon 0.360620857916101 - Mean Reward 767.22 - Mean Length 203.02 - Mean Loss 1.829 - Mean Q Value 44.268 - Time Delta 39.18Time 2023-05-30T04:35:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_4080000.chkpt at step 4080000
Episode 40000 - Step 4083635 - Epsilon 0.36026735234409346 - Mean Reward 786.94 - Mean Length 210.54 - Mean Loss 1.823 - Mean Q Value 43.247 - Time Delta 41.501Time 2023-05-30T04:35:45




Episode 40000 - Step 4087017 - Epsilon 0.3599628749955223 - Mean Reward 787.12 - Mean Length 199.29 - Mean Loss 1.808 - Mean Q Value 42.143 - Time Delta 36.425Time 2023-05-30T04:36:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_4090000.chkpt at step 4090000
Episode 40000 - Step 4090580 - Epsilon 0.3596423807858126 - Mean Reward 792.26 - Mean Length 190.39 - Mean Loss 1.793 - Mean Q Value 41.43 - Time Delta 38.332Time 2023-05-30T04:37:00




Episode 40000 - Step 4094466 - Epsilon 0.359293157831694 - Mean Reward 790.18 - Mean Length 184.12 - Mean Loss 1.773 - Mean Q Value 40.81 - Time Delta 41.144Time 2023-05-30T04:37:41




Episode 40000 - Step 4096852 - Epsilon 0.35907890334398795 - Mean Reward 783.61 - Mean Length 171.4 - Mean Loss 1.752 - Mean Q Value 40.514 - Time Delta 25.663Time 2023-05-30T04:38:07




Episode 40000 - Step 4099594 - Epsilon 0.358832839073117 - Mean Reward 745.94 - Mean Length 159.59 - Mean Loss 1.738 - Mean Q Value 40.348 - Time Delta 29.259Time 2023-05-30T04:38:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_4100000.chkpt at step 4100000
Episode 40000 - Step 4102274 - Epsilon 0.3585925015629206 - Mean Reward 707.77 - Mean Length 152.57 - Mean Loss 1.756 - Mean Q Value 40.295 - Time Delta 28.636Time 2023-05-30T04:39:05




Episode 40000 - Step 4105477 - Epsilon 0.358305473515567 - Mean Reward 693.84 - Mean Length 148.97 - Mean Loss 1.715 - Mean Q Value 40.023 - Time Delta 33.683Time 2023-05-30T04:39:38




Episode 40000 - Step 4109162 - Epsilon 0.357975536557349 - Mean Reward 667.48 - Mean Length 146.96 - Mean Loss 1.677 - Mean Q Value 40.299 - Time Delta 39.479Time 2023-05-30T04:40:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_4110000.chkpt at step 4110000
Episode 40000 - Step 4112267 - Epsilon 0.3576977658360459 - Mean Reward 684.97 - Mean Length 154.15 - Mean Loss 1.678 - Mean Q Value 40.543 - Time Delta 33.371Time 2023-05-30T04:40:51




Episode 40000 - Step 4116613 - Epsilon 0.3573093382163724 - Mean Reward 717.67 - Mean Length 170.19 - Mean Loss 1.64 - Mean Q Value 40.981 - Time Delta 45.828Time 2023-05-30T04:41:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_4120000.chkpt at step 4120000
Episode 40000 - Step 4120215 - Epsilon 0.35698772594472156 - Mean Reward 732.97 - Mean Length 179.41 - Mean Loss 1.603 - Mean Q Value 41.39 - Time Delta 38.676Time 2023-05-30T04:42:16




Episode 40000 - Step 4123667 - Epsilon 0.3566797783971736 - Mean Reward 746.1 - Mean Length 181.9 - Mean Loss 1.63 - Mean Q Value 42.032 - Time Delta 36.851Time 2023-05-30T04:42:53




Episode 40000 - Step 4128750 - Epsilon 0.35622681537442585 - Mean Reward 744.13 - Mean Length 195.88 - Mean Loss 1.631 - Mean Q Value 42.164 - Time Delta 53.835Time 2023-05-30T04:43:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_4130000.chkpt at step 4130000
Episode 40000 - Step 4132427 - Epsilon 0.3558995042968334 - Mean Reward 782.02 - Mean Length 201.6 - Mean Loss 1.627 - Mean Q Value 42.07 - Time Delta 39.371Time 2023-05-30T04:44:26




Episode 40000 - Step 4136795 - Epsilon 0.35551107411101396 - Mean Reward 795.4 - Mean Length 201.82 - Mean Loss 1.652 - Mean Q Value 41.744 - Time Delta 46.53Time 2023-05-30T04:45:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_4140000.chkpt at step 4140000
Episode 40000 - Step 4140206 - Epsilon 0.3552080412286534 - Mean Reward 821.9 - Mean Length 199.91 - Mean Loss 1.679 - Mean Q Value 41.32 - Time Delta 35.91Time 2023-05-30T04:45:48




Episode 40000 - Step 4143846 - Epsilon 0.3548849489000091 - Mean Reward 813.53 - Mean Length 201.79 - Mean Loss 1.661 - Mean Q Value 40.825 - Time Delta 38.93Time 2023-05-30T04:46:27




Episode 40000 - Step 4147237 - Epsilon 0.3545842226353019 - Mean Reward 822.02 - Mean Length 184.87 - Mean Loss 1.652 - Mean Q Value 40.545 - Time Delta 36.387Time 2023-05-30T04:47:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_4150000.chkpt at step 4150000
Episode 40000 - Step 4151878 - Epsilon 0.3541730548143948 - Mean Reward 803.24 - Mean Length 194.51 - Mean Loss 1.639 - Mean Q Value 40.103 - Time Delta 49.298Time 2023-05-30T04:47:53




Episode 40000 - Step 4156858 - Epsilon 0.3537323836807064 - Mean Reward 794.08 - Mean Length 200.63 - Mean Loss 1.637 - Mean Q Value 39.762 - Time Delta 53.687Time 2023-05-30T04:48:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_4160000.chkpt at step 4160000
Episode 40000 - Step 4162946 - Epsilon 0.3531944124253973 - Mean Reward 785.86 - Mean Length 227.4 - Mean Loss 1.61 - Mean Q Value 38.968 - Time Delta 64.948Time 2023-05-30T04:49:51




Episode 40000 - Step 4168767 - Epsilon 0.3526808000003231 - Mean Reward 785.84 - Mean Length 249.21 - Mean Loss 1.576 - Mean Q Value 37.965 - Time Delta 62.061Time 2023-05-30T04:50:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_4170000.chkpt at step 4170000
Episode 40000 - Step 4174302 - Epsilon 0.35219311537690856 - Mean Reward 786.82 - Mean Length 270.65 - Mean Loss 1.55 - Mean Q Value 36.697 - Time Delta 59.356Time 2023-05-30T04:51:53




Episode 40000 - Step 4177335 - Epsilon 0.35192616613368943 - Mean Reward 735.62 - Mean Length 254.57 - Mean Loss 1.511 - Mean Q Value 35.546 - Time Delta 32.386Time 2023-05-30T04:52:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_4180000.chkpt at step 4180000
Episode 40000 - Step 4181260 - Epsilon 0.3515810079109266 - Mean Reward 722.99 - Mean Length 244.02 - Mean Loss 1.442 - Mean Q Value 34.353 - Time Delta 42.156Time 2023-05-30T04:53:07




Episode 40000 - Step 4183951 - Epsilon 0.35134456130191116 - Mean Reward 709.67 - Mean Length 210.05 - Mean Loss 1.41 - Mean Q Value 33.587 - Time Delta 28.403Time 2023-05-30T04:53:36




Episode 40000 - Step 4187081 - Epsilon 0.3510697416854997 - Mean Reward 699.05 - Mean Length 183.14 - Mean Loss 1.398 - Mean Q Value 33.148 - Time Delta 33.619Time 2023-05-30T04:54:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_4190000.chkpt at step 4190000
Episode 40000 - Step 4190149 - Epsilon 0.3508005743987924 - Mean Reward 680.54 - Mean Length 158.47 - Mean Loss 1.365 - Mean Q Value 32.983 - Time Delta 33.007Time 2023-05-30T04:54:42




Episode 40000 - Step 4193831 - Epsilon 0.35047781100431075 - Mean Reward 730.58 - Mean Length 164.96 - Mean Loss 1.37 - Mean Q Value 32.992 - Time Delta 39.168Time 2023-05-30T04:55:22




Episode 40000 - Step 4196881 - Epsilon 0.3502106734991064 - Mean Reward 734.25 - Mean Length 156.21 - Mean Loss 1.366 - Mean Q Value 33.299 - Time Delta 32.787Time 2023-05-30T04:55:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_4200000.chkpt at step 4200000
Episode 40000 - Step 4200422 - Epsilon 0.34990078664551455 - Mean Reward 776.29 - Mean Length 164.71 - Mean Loss 1.339 - Mean Q Value 34.304 - Time Delta 37.877Time 2023-05-30T04:56:32




Episode 40000 - Step 4203892 - Epsilon 0.34959737929709667 - Mean Reward 809.28 - Mean Length 168.11 - Mean Loss 1.373 - Mean Q Value 35.537 - Time Delta 36.987Time 2023-05-30T04:57:09




Episode 40000 - Step 4207882 - Epsilon 0.3492488297356247 - Mean Reward 864.22 - Mean Length 177.33 - Mean Loss 1.42 - Mean Q Value 37.167 - Time Delta 42.211Time 2023-05-30T04:57:51




MarioNet saved to ./checkpoints_pytorch/ mario_net_4210000.chkpt at step 4210000
Episode 40000 - Step 4211060 - Epsilon 0.3489714617047317 - Mean Reward 833.61 - Mean Length 172.29 - Mean Loss 1.456 - Mean Q Value 38.936 - Time Delta 34.096Time 2023-05-30T04:58:26




Episode 40000 - Step 4214525 - Epsilon 0.34866929603261915 - Mean Reward 841.97 - Mean Length 176.44 - Mean Loss 1.518 - Mean Q Value 40.734 - Time Delta 37.745Time 2023-05-30T04:59:03




Episode 40000 - Step 4218242 - Epsilon 0.34834544554109237 - Mean Reward 814.85 - Mean Length 178.2 - Mean Loss 1.56 - Mean Q Value 42.147 - Time Delta 39.648Time 2023-05-30T04:59:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_4220000.chkpt at step 4220000
Episode 40000 - Step 4221065 - Epsilon 0.3480996874442338 - Mean Reward 781.73 - Mean Length 171.73 - Mean Loss 1.561 - Mean Q Value 43.185 - Time Delta 30.339Time 2023-05-30T05:00:13




Episode 40000 - Step 4225584 - Epsilon 0.3477066438354719 - Mean Reward 762.21 - Mean Length 177.02 - Mean Loss 1.562 - Mean Q Value 43.774 - Time Delta 48.321Time 2023-05-30T05:01:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_4230000.chkpt at step 4230000
Episode 40000 - Step 4230605 - Epsilon 0.34727045883409785 - Mean Reward 781.02 - Mean Length 195.45 - Mean Loss 1.563 - Mean Q Value 43.93 - Time Delta 52.892Time 2023-05-30T05:01:54




Episode 40000 - Step 4233318 - Epsilon 0.3470350024740931 - Mean Reward 750.96 - Mean Length 187.93 - Mean Loss 1.566 - Mean Q Value 43.726 - Time Delta 29.129Time 2023-05-30T05:02:24




Episode 40000 - Step 4238827 - Epsilon 0.34655737743720477 - Mean Reward 796.7 - Mean Length 205.85 - Mean Loss 1.604 - Mean Q Value 43.221 - Time Delta 58.158Time 2023-05-30T05:03:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_4240000.chkpt at step 4240000
Episode 40000 - Step 4242612 - Epsilon 0.3462296025807246 - Mean Reward 827.1 - Mean Length 215.47 - Mean Loss 1.623 - Mean Q Value 42.54 - Time Delta 40.192Time 2023-05-30T05:04:02




Episode 40000 - Step 4246211 - Epsilon 0.3459182225599284 - Mean Reward 826.76 - Mean Length 206.27 - Mean Loss 1.631 - Mean Q Value 41.841 - Time Delta 38.384Time 2023-05-30T05:04:40




Episode 40000 - Step 4249539 - Epsilon 0.3456305382560236 - Mean Reward 827.08 - Mean Length 189.34 - Mean Loss 1.612 - Mean Q Value 41.253 - Time Delta 35.613Time 2023-05-30T05:05:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_4250000.chkpt at step 4250000
Episode 40000 - Step 4251981 - Epsilon 0.3454195951829452 - Mean Reward 818.56 - Mean Length 186.63 - Mean Loss 1.63 - Mean Q Value 40.915 - Time Delta 25.904Time 2023-05-30T05:05:42




Episode 40000 - Step 4255430 - Epsilon 0.3451218854682893 - Mean Reward 771.02 - Mean Length 166.03 - Mean Loss 1.627 - Mean Q Value 40.657 - Time Delta 37.355Time 2023-05-30T05:06:19




Episode 40000 - Step 4259300 - Epsilon 0.34478814147702436 - Mean Reward 761.55 - Mean Length 166.88 - Mean Loss 1.605 - Mean Q Value 40.573 - Time Delta 41.51Time 2023-05-30T05:07:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_4260000.chkpt at step 4260000
Episode 40000 - Step 4262470 - Epsilon 0.34451500508531363 - Mean Reward 741.95 - Mean Length 162.59 - Mean Loss 1.632 - Mean Q Value 40.582 - Time Delta 33.468Time 2023-05-30T05:07:34




Episode 40000 - Step 4265643 - Epsilon 0.34424182688697225 - Mean Reward 734.19 - Mean Length 161.04 - Mean Loss 1.666 - Mean Q Value 40.7 - Time Delta 34.282Time 2023-05-30T05:08:08




Episode 40000 - Step 4268459 - Epsilon 0.34399956589640257 - Mean Reward 743.98 - Mean Length 164.78 - Mean Loss 1.646 - Mean Q Value 40.907 - Time Delta 30.101Time 2023-05-30T05:08:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_4270000.chkpt at step 4270000
Episode 40000 - Step 4271390 - Epsilon 0.3437475925109832 - Mean Reward 739.62 - Mean Length 159.6 - Mean Loss 1.639 - Mean Q Value 41.386 - Time Delta 31.199Time 2023-05-30T05:09:10




Episode 40000 - Step 4274917 - Epsilon 0.343444626623089 - Mean Reward 727.78 - Mean Length 156.17 - Mean Loss 1.644 - Mean Q Value 41.898 - Time Delta 38.09Time 2023-05-30T05:09:48




Episode 40000 - Step 4278399 - Epsilon 0.34314578812654395 - Mean Reward 697.17 - Mean Length 159.29 - Mean Loss 1.619 - Mean Q Value 42.405 - Time Delta 37.318Time 2023-05-30T05:10:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_4280000.chkpt at step 4280000
Episode 40000 - Step 4282843 - Epsilon 0.34276476480590334 - Mean Reward 723.01 - Mean Length 172.0 - Mean Loss 1.613 - Mean Q Value 42.835 - Time Delta 48.202Time 2023-05-30T05:11:13




Episode 40000 - Step 4288228 - Epsilon 0.342303628155831 - Mean Reward 734.98 - Mean Length 197.69 - Mean Loss 1.58 - Mean Q Value 42.601 - Time Delta 57.617Time 2023-05-30T05:12:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_4290000.chkpt at step 4290000
Episode 40000 - Step 4291367 - Epsilon 0.3420351107233414 - Mean Reward 739.78 - Mean Length 199.77 - Mean Loss 1.547 - Mean Q Value 41.998 - Time Delta 33.749Time 2023-05-30T05:12:45




Episode 40000 - Step 4296980 - Epsilon 0.34155548648925654 - Mean Reward 748.36 - Mean Length 220.63 - Mean Loss 1.514 - Mean Q Value 41.149 - Time Delta 59.307Time 2023-05-30T05:13:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_4300000.chkpt at step 4300000
Episode 40000 - Step 4303323 - Epsilon 0.3410142940202145 - Mean Reward 770.93 - Mean Length 249.24 - Mean Loss 1.46 - Mean Q Value 39.78 - Time Delta 67.991Time 2023-05-30T05:14:52




Episode 40000 - Step 4308196 - Epsilon 0.3405991062571641 - Mean Reward 771.41 - Mean Length 253.53 - Mean Loss 1.391 - Mean Q Value 37.936 - Time Delta 51.222Time 2023-05-30T05:15:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_4310000.chkpt at step 4310000
Episode 40000 - Step 4310874 - Epsilon 0.3403711514433868 - Mean Reward 760.39 - Mean Length 226.46 - Mean Loss 1.366 - Mean Q Value 36.385 - Time Delta 28.798Time 2023-05-30T05:16:12




Episode 40000 - Step 4315017 - Epsilon 0.3400187944875453 - Mean Reward 769.41 - Mean Length 236.5 - Mean Loss 1.344 - Mean Q Value 34.932 - Time Delta 44.621Time 2023-05-30T05:16:57




Episode 40000 - Step 4318043 - Epsilon 0.33976166750788106 - Mean Reward 751.78 - Mean Length 210.63 - Mean Loss 1.357 - Mean Q Value 33.789 - Time Delta 32.242Time 2023-05-30T05:17:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_4320000.chkpt at step 4320000
Episode 40000 - Step 4322281 - Epsilon 0.3394018806068996 - Mean Reward 764.68 - Mean Length 189.58 - Mean Loss 1.409 - Mean Q Value 33.271 - Time Delta 45.689Time 2023-05-30T05:18:15




Episode 40000 - Step 4325205 - Epsilon 0.3391538684606247 - Mean Reward 739.71 - Mean Length 170.09 - Mean Loss 1.456 - Mean Q Value 33.33 - Time Delta 31.458Time 2023-05-30T05:18:46




Episode 40000 - Step 4328004 - Epsilon 0.33891662852548365 - Mean Reward 736.46 - Mean Length 171.3 - Mean Loss 1.501 - Mean Q Value 33.989 - Time Delta 29.645Time 2023-05-30T05:19:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_4330000.chkpt at step 4330000
Episode 40000 - Step 4331946 - Epsilon 0.3385827906719165 - Mean Reward 742.82 - Mean Length 169.29 - Mean Loss 1.57 - Mean Q Value 34.605 - Time Delta 41.781Time 2023-05-30T05:19:58




Episode 40000 - Step 4334478 - Epsilon 0.33836853555750723 - Mean Reward 731.67 - Mean Length 164.35 - Mean Loss 1.618 - Mean Q Value 35.234 - Time Delta 27.19Time 2023-05-30T05:20:25




Episode 40000 - Step 4337566 - Epsilon 0.33810741582056814 - Mean Reward 736.78 - Mean Length 152.85 - Mean Loss 1.655 - Mean Q Value 36.202 - Time Delta 33.102Time 2023-05-30T05:20:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_4340000.chkpt at step 4340000
Episode 40000 - Step 4340152 - Epsilon 0.33788889999174876 - Mean Reward 717.81 - Mean Length 149.47 - Mean Loss 1.688 - Mean Q Value 37.517 - Time Delta 27.313Time 2023-05-30T05:21:25




Episode 40000 - Step 4344805 - Epsilon 0.33749607919836566 - Mean Reward 734.96 - Mean Length 168.01 - Mean Loss 1.738 - Mean Q Value 38.541 - Time Delta 49.651Time 2023-05-30T05:22:15




Episode 40000 - Step 4347778 - Epsilon 0.33724532840293026 - Mean Reward 711.88 - Mean Length 158.32 - Mean Loss 1.756 - Mean Q Value 39.875 - Time Delta 31.998Time 2023-05-30T05:22:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_4350000.chkpt at step 4350000
Episode 40000 - Step 4350762 - Epsilon 0.3369938371743617 - Mean Reward 732.75 - Mean Length 162.84 - Mean Loss 1.754 - Mean Q Value 41.181 - Time Delta 32.245Time 2023-05-30T05:23:19




Episode 40000 - Step 4353649 - Epsilon 0.3367506995947829 - Mean Reward 719.39 - Mean Length 160.83 - Mean Loss 1.751 - Mean Q Value 42.087 - Time Delta 31.252Time 2023-05-30T05:23:50




Episode 40000 - Step 4357219 - Epsilon 0.3364502836383626 - Mean Reward 787.17 - Mean Length 170.67 - Mean Loss 1.756 - Mean Q Value 42.737 - Time Delta 38.454Time 2023-05-30T05:24:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_4360000.chkpt at step 4360000
Episode 40000 - Step 4360443 - Epsilon 0.33617921393159533 - Mean Reward 792.54 - Mean Length 156.38 - Mean Loss 1.727 - Mean Q Value 43.327 - Time Delta 34.662Time 2023-05-30T05:25:03




Episode 40000 - Step 4365357 - Epsilon 0.33576647129470394 - Mean Reward 793.54 - Mean Length 175.79 - Mean Loss 1.672 - Mean Q Value 43.642 - Time Delta 52.44Time 2023-05-30T05:25:56




Episode 40000 - Step 4369178 - Epsilon 0.33544588347801557 - Mean Reward 802.62 - Mean Length 184.16 - Mean Loss 1.637 - Mean Q Value 43.645 - Time Delta 38.393Time 2023-05-30T05:26:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_4370000.chkpt at step 4370000
Episode 40000 - Step 4373581 - Epsilon 0.33507684452267283 - Mean Reward 818.53 - Mean Length 199.32 - Mean Loss 1.611 - Mean Q Value 43.426 - Time Delta 46.739Time 2023-05-30T05:27:21




Episode 40000 - Step 4376702 - Epsilon 0.33481550275124616 - Mean Reward 770.7 - Mean Length 194.83 - Mean Loss 1.57 - Mean Q Value 42.871 - Time Delta 33.104Time 2023-05-30T05:27:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_4380000.chkpt at step 4380000
Episode 40000 - Step 4380063 - Epsilon 0.33453429215001784 - Mean Reward 789.12 - Mean Length 196.2 - Mean Loss 1.54 - Mean Q Value 42.322 - Time Delta 36.351Time 2023-05-30T05:28:30




Episode 40000 - Step 4383843 - Epsilon 0.334218306531114 - Mean Reward 810.1 - Mean Length 184.86 - Mean Loss 1.584 - Mean Q Value 41.8 - Time Delta 40.652Time 2023-05-30T05:29:11




Episode 40000 - Step 4387883 - Epsilon 0.3338809164099888 - Mean Reward 816.13 - Mean Length 187.05 - Mean Loss 1.589 - Mean Q Value 41.746 - Time Delta 43.539Time 2023-05-30T05:29:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_4390000.chkpt at step 4390000
Episode 40000 - Step 4392589 - Epsilon 0.3334883364431527 - Mean Reward 832.28 - Mean Length 190.08 - Mean Loss 1.623 - Mean Q Value 41.61 - Time Delta 50.705Time 2023-05-30T05:30:45




Episode 40000 - Step 4396352 - Epsilon 0.33317475477550706 - Mean Reward 848.19 - Mean Length 196.5 - Mean Loss 1.644 - Mean Q Value 41.402 - Time Delta 40.463Time 2023-05-30T05:31:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_4400000.chkpt at step 4400000
Episode 40000 - Step 4400253 - Epsilon 0.3328499844471438 - Mean Reward 828.83 - Mean Length 201.9 - Mean Loss 1.671 - Mean Q Value 41.016 - Time Delta 41.842Time 2023-05-30T05:32:08




Episode 40000 - Step 4403439 - Epsilon 0.33258497495540945 - Mean Reward 827.55 - Mean Length 195.96 - Mean Loss 1.674 - Mean Q Value 40.734 - Time Delta 33.828Time 2023-05-30T05:32:41




Episode 40000 - Step 4406186 - Epsilon 0.3323566506051341 - Mean Reward 818.84 - Mean Length 183.03 - Mean Loss 1.704 - Mean Q Value 40.543 - Time Delta 29.556Time 2023-05-30T05:33:11




Episode 40000 - Step 4409396 - Epsilon 0.3320900413509321 - Mean Reward 789.36 - Mean Length 168.07 - Mean Loss 1.688 - Mean Q Value 40.716 - Time Delta 33.912Time 2023-05-30T05:33:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_4410000.chkpt at step 4410000
Episode 40000 - Step 4414413 - Epsilon 0.3316737784679065 - Mean Reward 801.42 - Mean Length 180.61 - Mean Loss 1.726 - Mean Q Value 41.301 - Time Delta 53.736Time 2023-05-30T05:34:39




Episode 40000 - Step 4419430 - Epsilon 0.331258037355383 - Mean Reward 827.8 - Mean Length 191.77 - Mean Loss 1.755 - Mean Q Value 41.752 - Time Delta 53.502Time 2023-05-30T05:35:32




MarioNet saved to ./checkpoints_pytorch/ mario_net_4420000.chkpt at step 4420000
Episode 40000 - Step 4426373 - Epsilon 0.33068355486837503 - Mean Reward 812.58 - Mean Length 229.34 - Mean Loss 1.811 - Mean Q Value 41.764 - Time Delta 74.299Time 2023-05-30T05:36:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_4430000.chkpt at step 4430000
Episode 40000 - Step 4431587 - Epsilon 0.3302527896129477 - Mean Reward 816.75 - Mean Length 254.01 - Mean Loss 1.839 - Mean Q Value 40.897 - Time Delta 55.305Time 2023-05-30T05:37:42




Episode 40000 - Step 4434347 - Epsilon 0.3300249937582164 - Mean Reward 799.76 - Mean Length 249.51 - Mean Loss 1.865 - Mean Q Value 39.722 - Time Delta 29.537Time 2023-05-30T05:38:11




Episode 40000 - Step 4438340 - Epsilon 0.32969571064776787 - Mean Reward 791.37 - Mean Length 239.27 - Mean Loss 1.838 - Mean Q Value 38.524 - Time Delta 42.69Time 2023-05-30T05:38:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_4440000.chkpt at step 4440000
Episode 40000 - Step 4442865 - Epsilon 0.32932295320912747 - Mean Reward 789.37 - Mean Length 234.35 - Mean Loss 1.819 - Mean Q Value 37.58 - Time Delta 47.841Time 2023-05-30T05:39:42




Episode 40000 - Step 4447560 - Epsilon 0.32893663710806803 - Mean Reward 816.85 - Mean Length 211.87 - Mean Loss 1.699 - Mean Q Value 36.737 - Time Delta 50.594Time 2023-05-30T05:40:32




MarioNet saved to ./checkpoints_pytorch/ mario_net_4450000.chkpt at step 4450000
Episode 40000 - Step 4450975 - Epsilon 0.32865592726407433 - Mean Reward 776.02 - Mean Length 193.88 - Mean Loss 1.631 - Mean Q Value 36.266 - Time Delta 36.455Time 2023-05-30T05:41:09




Episode 40000 - Step 4453252 - Epsilon 0.328468893093723 - Mean Reward 749.35 - Mean Length 189.05 - Mean Loss 1.541 - Mean Q Value 35.958 - Time Delta 24.064Time 2023-05-30T05:41:33




Episode 40000 - Step 4457218 - Epsilon 0.3281433775464855 - Mean Reward 758.17 - Mean Length 188.78 - Mean Loss 1.46 - Mean Q Value 35.697 - Time Delta 42.667Time 2023-05-30T05:42:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_4460000.chkpt at step 4460000
Episode 40000 - Step 4460244 - Epsilon 0.3278952309236134 - Mean Reward 705.69 - Mean Length 173.79 - Mean Loss 1.373 - Mean Q Value 35.595 - Time Delta 32.721Time 2023-05-30T05:42:48




Episode 40000 - Step 4462782 - Epsilon 0.3276872473633274 - Mean Reward 663.56 - Mean Length 152.22 - Mean Loss 1.385 - Mean Q Value 35.74 - Time Delta 27.233Time 2023-05-30T05:43:16




Episode 40000 - Step 4466722 - Epsilon 0.32736463429732204 - Mean Reward 727.79 - Mean Length 157.47 - Mean Loss 1.352 - Mean Q Value 36.635 - Time Delta 41.963Time 2023-05-30T05:43:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_4470000.chkpt at step 4470000
Episode 40000 - Step 4470967 - Epsilon 0.3270174028180147 - Mean Reward 764.9 - Mean Length 177.15 - Mean Loss 1.368 - Mean Q Value 37.802 - Time Delta 45.418Time 2023-05-30T05:44:43




Episode 40000 - Step 4475191 - Epsilon 0.32667225466774413 - Mean Reward 758.57 - Mean Length 179.73 - Mean Loss 1.387 - Mean Q Value 38.766 - Time Delta 44.634Time 2023-05-30T05:45:28




Episode 40000 - Step 4477458 - Epsilon 0.3264871655988184 - Mean Reward 757.85 - Mean Length 172.14 - Mean Loss 1.4 - Mean Q Value 39.568 - Time Delta 24.419Time 2023-05-30T05:45:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_4480000.chkpt at step 4480000
Episode 40000 - Step 4480843 - Epsilon 0.32621099267252035 - Mean Reward 771.67 - Mean Length 180.61 - Mean Loss 1.379 - Mean Q Value 40.588 - Time Delta 36.37Time 2023-05-30T05:46:28




Episode 40000 - Step 4484133 - Epsilon 0.32594279440914153 - Mean Reward 749.19 - Mean Length 174.11 - Mean Loss 1.381 - Mean Q Value 41.028 - Time Delta 35.332Time 2023-05-30T05:47:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_4490000.chkpt at step 4490000
Episode 40000 - Step 4490309 - Epsilon 0.32543992698519586 - Mean Reward 752.26 - Mean Length 193.42 - Mean Loss 1.363 - Mean Q Value 40.965 - Time Delta 66.25Time 2023-05-30T05:48:10




Episode 40000 - Step 4495207 - Epsilon 0.3250416696275914 - Mean Reward 748.26 - Mean Length 200.16 - Mean Loss 1.372 - Mean Q Value 40.839 - Time Delta 52.197Time 2023-05-30T05:49:02




Episode 40000 - Step 4498490 - Epsilon 0.3247750010929436 - Mean Reward 738.53 - Mean Length 210.32 - Mean Loss 1.385 - Mean Q Value 40.674 - Time Delta 35.107Time 2023-05-30T05:49:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_4500000.chkpt at step 4500000
Episode 40000 - Step 4502514 - Epsilon 0.32444844168789955 - Mean Reward 772.43 - Mean Length 216.71 - Mean Loss 1.396 - Mean Q Value 40.217 - Time Delta 43.334Time 2023-05-30T05:50:21




Episode 40000 - Step 4506858 - Epsilon 0.32409628189353706 - Mean Reward 796.83 - Mean Length 227.25 - Mean Loss 1.418 - Mean Q Value 39.713 - Time Delta 46.338Time 2023-05-30T05:51:07




Episode 40000 - Step 4509932 - Epsilon 0.3238473095496222 - Mean Reward 798.71 - Mean Length 196.23 - Mean Loss 1.437 - Mean Q Value 39.466 - Time Delta 32.45Time 2023-05-30T05:51:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_4510000.chkpt at step 4510000
Episode 40000 - Step 4513636 - Epsilon 0.3235475657062874 - Mean Reward 806.54 - Mean Length 184.29 - Mean Loss 1.472 - Mean Q Value 39.658 - Time Delta 39.679Time 2023-05-30T05:52:19




Episode 40000 - Step 4517103 - Epsilon 0.323267252316999 - Mean Reward 849.45 - Mean Length 186.13 - Mean Loss 1.52 - Mean Q Value 39.95 - Time Delta 37.262Time 2023-05-30T05:52:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_4520000.chkpt at step 4520000
Episode 40000 - Step 4521811 - Epsilon 0.32288699054139 - Mean Reward 845.16 - Mean Length 192.97 - Mean Loss 1.583 - Mean Q Value 40.186 - Time Delta 50.908Time 2023-05-30T05:53:47




Episode 40000 - Step 4525072 - Epsilon 0.32262386416088856 - Mean Reward 816.2 - Mean Length 182.14 - Mean Loss 1.649 - Mean Q Value 40.502 - Time Delta 35.163Time 2023-05-30T05:54:22




Episode 40000 - Step 4529022 - Epsilon 0.3223054303077702 - Mean Reward 815.3 - Mean Length 190.9 - Mean Loss 1.737 - Mean Q Value 41.09 - Time Delta 42.435Time 2023-05-30T05:55:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_4530000.chkpt at step 4530000
Episode 40000 - Step 4532590 - Epsilon 0.3220180620132877 - Mean Reward 780.65 - Mean Length 189.54 - Mean Loss 1.851 - Mean Q Value 41.495 - Time Delta 38.181Time 2023-05-30T05:55:43




Episode 40000 - Step 4537077 - Epsilon 0.3216570407327145 - Mean Reward 790.52 - Mean Length 199.74 - Mean Loss 1.962 - Mean Q Value 41.763 - Time Delta 48.107Time 2023-05-30T05:56:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_4540000.chkpt at step 4540000
Episode 40000 - Step 4543601 - Epsilon 0.321132845630988 - Mean Reward 780.14 - Mean Length 217.9 - Mean Loss 2.021 - Mean Q Value 41.662 - Time Delta 68.829Time 2023-05-30T05:57:40




Episode 40000 - Step 4547106 - Epsilon 0.3208515761889777 - Mean Reward 773.63 - Mean Length 220.34 - Mean Loss 2.037 - Mean Q Value 41.36 - Time Delta 37.6Time 2023-05-30T05:58:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_4550000.chkpt at step 4550000
Episode 40000 - Step 4551384 - Epsilon 0.32050860881981724 - Mean Reward 777.48 - Mean Length 223.62 - Mean Loss 2.031 - Mean Q Value 40.726 - Time Delta 46.127Time 2023-05-30T05:59:04




Episode 40000 - Step 4557298 - Epsilon 0.3200350869197435 - Mean Reward 812.88 - Mean Length 247.08 - Mean Loss 1.948 - Mean Q Value 39.528 - Time Delta 63.195Time 2023-05-30T06:00:07




MarioNet saved to ./checkpoints_pytorch/ mario_net_4560000.chkpt at step 4560000
Episode 40000 - Step 4561730 - Epsilon 0.3196806843743529 - Mean Reward 815.04 - Mean Length 246.53 - Mean Loss 1.865 - Mean Q Value 38.106 - Time Delta 47.469Time 2023-05-30T06:00:54




Episode 40000 - Step 4565803 - Epsilon 0.3193553351483097 - Mean Reward 828.85 - Mean Length 222.02 - Mean Loss 1.792 - Mean Q Value 37.066 - Time Delta 43.271Time 2023-05-30T06:01:38




Episode 40000 - Step 4569182 - Epsilon 0.3190856736095255 - Mean Reward 828.76 - Mean Length 220.76 - Mean Loss 1.73 - Mean Q Value 36.24 - Time Delta 36.368Time 2023-05-30T06:02:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_4570000.chkpt at step 4570000
Episode 40000 - Step 4572929 - Epsilon 0.3187869100226249 - Mean Reward 839.47 - Mean Length 215.45 - Mean Loss 1.639 - Mean Q Value 35.456 - Time Delta 40.087Time 2023-05-30T06:02:54




Episode 40000 - Step 4577227 - Epsilon 0.31844455740684574 - Mean Reward 846.79 - Mean Length 199.29 - Mean Loss 1.569 - Mean Q Value 35.16 - Time Delta 45.439Time 2023-05-30T06:03:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_4580000.chkpt at step 4580000
Episode 40000 - Step 4583013 - Epsilon 0.317984260287106 - Mean Reward 862.56 - Mean Length 212.83 - Mean Loss 1.513 - Mean Q Value 35.464 - Time Delta 62.419Time 2023-05-30T06:04:42




Episode 40000 - Step 4586999 - Epsilon 0.3176675467608193 - Mean Reward 830.88 - Mean Length 211.96 - Mean Loss 1.466 - Mean Q Value 35.751 - Time Delta 42.467Time 2023-05-30T06:05:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_4590000.chkpt at step 4590000
Episode 40000 - Step 4592209 - Epsilon 0.31725405407498825 - Mean Reward 858.1 - Mean Length 230.27 - Mean Loss 1.447 - Mean Q Value 36.146 - Time Delta 55.776Time 2023-05-30T06:06:20




Episode 40000 - Step 4596349 - Epsilon 0.31692586595460165 - Mean Reward 877.76 - Mean Length 234.2 - Mean Loss 1.475 - Mean Q Value 36.58 - Time Delta 44.302Time 2023-05-30T06:07:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_4600000.chkpt at step 4600000
Episode 40000 - Step 4601960 - Epsilon 0.31648160980354373 - Mean Reward 848.62 - Mean Length 247.33 - Mean Loss 1.508 - Mean Q Value 36.908 - Time Delta 59.275Time 2023-05-30T06:08:04




Episode 40000 - Step 4606675 - Epsilon 0.31610877684092903 - Mean Reward 803.96 - Mean Length 236.62 - Mean Loss 1.53 - Mean Q Value 36.585 - Time Delta 49.705Time 2023-05-30T06:08:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_4610000.chkpt at step 4610000
Episode 40000 - Step 4610255 - Epsilon 0.3158259860180385 - Mean Reward 810.49 - Mean Length 232.56 - Mean Loss 1.56 - Mean Q Value 36.429 - Time Delta 37.681Time 2023-05-30T06:09:31




Episode 40000 - Step 4616046 - Epsilon 0.3153690797124601 - Mean Reward 813.21 - Mean Length 238.37 - Mean Loss 1.57 - Mean Q Value 35.884 - Time Delta 61.811Time 2023-05-30T06:10:33




Episode 40000 - Step 4619019 - Epsilon 0.3151347687012601 - Mean Reward 771.18 - Mean Length 226.7 - Mean Loss 1.54 - Mean Q Value 35.134 - Time Delta 32.069Time 2023-05-30T06:11:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_4620000.chkpt at step 4620000
Episode 40000 - Step 4623641 - Epsilon 0.3147708407303621 - Mean Reward 786.94 - Mean Length 216.81 - Mean Loss 1.554 - Mean Q Value 34.679 - Time Delta 49.783Time 2023-05-30T06:11:55




Episode 40000 - Step 4627397 - Epsilon 0.3144754096005689 - Mean Reward 810.01 - Mean Length 207.22 - Mean Loss 1.549 - Mean Q Value 34.595 - Time Delta 40.17Time 2023-05-30T06:12:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_4630000.chkpt at step 4630000
Episode 40000 - Step 4631423 - Epsilon 0.31415904929624505 - Mean Reward 834.43 - Mean Length 211.68 - Mean Loss 1.565 - Mean Q Value 34.593 - Time Delta 43.209Time 2023-05-30T06:13:18




Episode 40000 - Step 4635688 - Epsilon 0.31382425568659905 - Mean Reward 837.87 - Mean Length 196.42 - Mean Loss 1.602 - Mean Q Value 35.036 - Time Delta 45.771Time 2023-05-30T06:14:04




Episode 40000 - Step 4639203 - Epsilon 0.31354860371974064 - Mean Reward 856.66 - Mean Length 201.84 - Mean Loss 1.664 - Mean Q Value 35.697 - Time Delta 37.536Time 2023-05-30T06:14:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_4640000.chkpt at step 4640000
Episode 40000 - Step 4642356 - Epsilon 0.3133015463862047 - Mean Reward 855.95 - Mean Length 187.15 - Mean Loss 1.674 - Mean Q Value 36.518 - Time Delta 33.411Time 2023-05-30T06:15:15




Episode 40000 - Step 4645945 - Epsilon 0.3130205626137422 - Mean Reward 859.11 - Mean Length 185.48 - Mean Loss 1.704 - Mean Q Value 37.651 - Time Delta 37.958Time 2023-05-30T06:15:53




Episode 40000 - Step 4649365 - Epsilon 0.3127530443795731 - Mean Reward 855.43 - Mean Length 179.42 - Mean Loss 1.698 - Mean Q Value 38.827 - Time Delta 36.715Time 2023-05-30T06:16:30




MarioNet saved to ./checkpoints_pytorch/ mario_net_4650000.chkpt at step 4650000
Episode 40000 - Step 4655820 - Epsilon 0.31224874610620873 - Mean Reward 865.73 - Mean Length 201.32 - Mean Loss 1.711 - Mean Q Value 39.916 - Time Delta 69.306Time 2023-05-30T06:17:39




Episode 40000 - Step 4659430 - Epsilon 0.31196706870364527 - Mean Reward 861.08 - Mean Length 202.27 - Mean Loss 1.713 - Mean Q Value 40.875 - Time Delta 38.786Time 2023-05-30T06:18:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_4660000.chkpt at step 4660000
Episode 40000 - Step 4664380 - Epsilon 0.3115812481835285 - Mean Reward 887.69 - Mean Length 220.24 - Mean Loss 1.715 - Mean Q Value 41.458 - Time Delta 52.532Time 2023-05-30T06:19:10




Episode 40000 - Step 4667873 - Epsilon 0.31130927859052415 - Mean Reward 891.4 - Mean Length 219.28 - Mean Loss 1.71 - Mean Q Value 41.84 - Time Delta 37.205Time 2023-05-30T06:19:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_4670000.chkpt at step 4670000
Episode 40000 - Step 4671838 - Epsilon 0.31100084612162193 - Mean Reward 884.93 - Mean Length 224.73 - Mean Loss 1.715 - Mean Q Value 42.014 - Time Delta 42.681Time 2023-05-30T06:20:30




Episode 40000 - Step 4675526 - Epsilon 0.31071423545345467 - Mean Reward 878.99 - Mean Length 197.06 - Mean Loss 1.692 - Mean Q Value 42.269 - Time Delta 39.282Time 2023-05-30T06:21:09




Episode 40000 - Step 4679033 - Epsilon 0.31044193610011667 - Mean Reward 885.92 - Mean Length 196.03 - Mean Loss 1.669 - Mean Q Value 42.736 - Time Delta 37.162Time 2023-05-30T06:21:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_4680000.chkpt at step 4680000
Episode 40000 - Step 4682715 - Epsilon 0.31015630574373254 - Mean Reward 852.93 - Mean Length 183.35 - Mean Loss 1.662 - Mean Q Value 43.139 - Time Delta 39.44Time 2023-05-30T06:22:26




Episode 40000 - Step 4686623 - Epsilon 0.30985343097353657 - Mean Reward 878.93 - Mean Length 187.5 - Mean Loss 1.657 - Mean Q Value 43.277 - Time Delta 41.894Time 2023-05-30T06:23:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_4690000.chkpt at step 4690000
Episode 40000 - Step 4690148 - Epsilon 0.30958049288434486 - Mean Reward 890.68 - Mean Length 183.1 - Mean Loss 1.619 - Mean Q Value 43.453 - Time Delta 37.662Time 2023-05-30T06:23:45




Episode 40000 - Step 4693095 - Epsilon 0.3093524934272932 - Mean Reward 846.67 - Mean Length 175.69 - Mean Loss 1.648 - Mean Q Value 43.502 - Time Delta 31.84Time 2023-05-30T06:24:17




Episode 40000 - Step 4696605 - Epsilon 0.3090811556472182 - Mean Reward 853.08 - Mean Length 175.72 - Mean Loss 1.656 - Mean Q Value 43.833 - Time Delta 37.794Time 2023-05-30T06:24:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_4700000.chkpt at step 4700000
Episode 40000 - Step 4700264 - Epsilon 0.3087985528998624 - Mean Reward 884.13 - Mean Length 175.49 - Mean Loss 1.659 - Mean Q Value 44.435 - Time Delta 38.981Time 2023-05-30T06:25:34




Episode 40000 - Step 4703642 - Epsilon 0.30853788257285597 - Mean Reward 840.54 - Mean Length 170.19 - Mean Loss 1.691 - Mean Q Value 45.213 - Time Delta 36.17Time 2023-05-30T06:26:10




Episode 40000 - Step 4707679 - Epsilon 0.30822664775920844 - Mean Reward 841.65 - Mean Length 175.31 - Mean Loss 1.744 - Mean Q Value 45.979 - Time Delta 42.897Time 2023-05-30T06:26:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_4710000.chkpt at step 4710000
Episode 40000 - Step 4711409 - Epsilon 0.3079393603427998 - Mean Reward 877.55 - Mean Length 183.14 - Mean Loss 1.751 - Mean Q Value 46.764 - Time Delta 39.259Time 2023-05-30T06:27:32




Episode 40000 - Step 4715182 - Epsilon 0.3076490334517718 - Mean Reward 893.53 - Mean Length 185.77 - Mean Loss 1.778 - Mean Q Value 47.154 - Time Delta 40.589Time 2023-05-30T06:28:13




Episode 40000 - Step 4718851 - Epsilon 0.30736697172087807 - Mean Reward 873.67 - Mean Length 185.87 - Mean Loss 1.785 - Mean Q Value 47.325 - Time Delta 39.633Time 2023-05-30T06:28:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_4720000.chkpt at step 4720000
Episode 40000 - Step 4722195 - Epsilon 0.3071101202792385 - Mean Reward 875.42 - Mean Length 185.53 - Mean Loss 1.814 - Mean Q Value 47.371 - Time Delta 35.938Time 2023-05-30T06:29:29




Episode 40000 - Step 4726190 - Epsilon 0.3068035471286958 - Mean Reward 854.67 - Mean Length 185.11 - Mean Loss 1.834 - Mean Q Value 47.381 - Time Delta 43.198Time 2023-05-30T06:30:12




Episode 40000 - Step 4728977 - Epsilon 0.3065898561837207 - Mean Reward 806.98 - Mean Length 175.68 - Mean Loss 1.846 - Mean Q Value 47.356 - Time Delta 29.853Time 2023-05-30T06:30:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_4730000.chkpt at step 4730000
Episode 40000 - Step 4732884 - Epsilon 0.3062905407062305 - Mean Reward 783.89 - Mean Length 177.02 - Mean Loss 1.901 - Mean Q Value 47.185 - Time Delta 41.643Time 2023-05-30T06:31:23




Episode 40000 - Step 4738589 - Epsilon 0.30585400514634187 - Mean Reward 775.39 - Mean Length 197.38 - Mean Loss 1.944 - Mean Q Value 46.762 - Time Delta 61.257Time 2023-05-30T06:32:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_4740000.chkpt at step 4740000
Episode 40000 - Step 4743437 - Epsilon 0.30548353459624566 - Mean Reward 792.82 - Mean Length 212.42 - Mean Loss 1.986 - Mean Q Value 45.898 - Time Delta 51.827Time 2023-05-30T06:33:16




Episode 40000 - Step 4746841 - Epsilon 0.30522367866016875 - Mean Reward 789.74 - Mean Length 206.51 - Mean Loss 2.065 - Mean Q Value 45.025 - Time Delta 36.124Time 2023-05-30T06:33:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_4750000.chkpt at step 4750000
Episode 40000 - Step 4750902 - Epsilon 0.3049139575304425 - Mean Reward 828.73 - Mean Length 219.25 - Mean Loss 2.149 - Mean Q Value 44.014 - Time Delta 43.714Time 2023-05-30T06:34:36




Episode 40000 - Step 4756718 - Epsilon 0.3044709347363755 - Mean Reward 850.68 - Mean Length 238.34 - Mean Loss 2.166 - Mean Q Value 42.706 - Time Delta 62.515Time 2023-05-30T06:35:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_4760000.chkpt at step 4760000
Episode 40000 - Step 4760777 - Epsilon 0.30416212952325056 - Mean Reward 856.06 - Mean Length 221.88 - Mean Loss 2.196 - Mean Q Value 41.443 - Time Delta 43.781Time 2023-05-30T06:36:22




Episode 40000 - Step 4766202 - Epsilon 0.30374988919715573 - Mean Reward 849.67 - Mean Length 227.65 - Mean Loss 2.155 - Mean Q Value 40.357 - Time Delta 57.569Time 2023-05-30T06:37:20




Episode 40000 - Step 4769781 - Epsilon 0.3034782305012345 - Mean Reward 863.42 - Mean Length 229.4 - Mean Loss 2.075 - Mean Q Value 39.276 - Time Delta 38.296Time 2023-05-30T06:37:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_4770000.chkpt at step 4770000
Episode 40000 - Step 4775511 - Epsilon 0.30304380911028733 - Mean Reward 862.07 - Mean Length 246.09 - Mean Loss 1.963 - Mean Q Value 38.291 - Time Delta 61.275Time 2023-05-30T06:39:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_4780000.chkpt at step 4780000
Episode 40000 - Step 4781225 - Epsilon 0.30261122002515634 - Mean Reward 851.62 - Mean Length 245.07 - Mean Loss 1.87 - Mean Q Value 37.613 - Time Delta 60.608Time 2023-05-30T06:40:00




Episode 40000 - Step 4784380 - Epsilon 0.3023726295020098 - Mean Reward 834.31 - Mean Length 236.03 - Mean Loss 1.775 - Mean Q Value 37.119 - Time Delta 34.332Time 2023-05-30T06:40:35




Episode 40000 - Step 4787044 - Epsilon 0.30217131635028915 - Mean Reward 801.87 - Mean Length 208.42 - Mean Loss 1.7 - Mean Q Value 37.094 - Time Delta 28.926Time 2023-05-30T06:41:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_4790000.chkpt at step 4790000
Episode 40000 - Step 4790156 - Epsilon 0.301936318462793 - Mean Reward 787.66 - Mean Length 203.75 - Mean Loss 1.648 - Mean Q Value 37.064 - Time Delta 33.085Time 2023-05-30T06:41:37




Episode 40000 - Step 4793882 - Epsilon 0.30165519570021904 - Mean Reward 798.74 - Mean Length 183.71 - Mean Loss 1.655 - Mean Q Value 37.282 - Time Delta 40.245Time 2023-05-30T06:42:17




Episode 40000 - Step 4796902 - Epsilon 0.3014275319528534 - Mean Reward 783.74 - Mean Length 156.77 - Mean Loss 1.649 - Mean Q Value 37.985 - Time Delta 32.415Time 2023-05-30T06:42:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_4800000.chkpt at step 4800000
Episode 40000 - Step 4800588 - Epsilon 0.3011498943885693 - Mean Reward 793.4 - Mean Length 162.08 - Mean Loss 1.65 - Mean Q Value 38.727 - Time Delta 39.362Time 2023-05-30T06:43:29




Episode 40000 - Step 4807688 - Epsilon 0.30061582738388154 - Mean Reward 819.74 - Mean Length 206.44 - Mean Loss 1.655 - Mean Q Value 39.393 - Time Delta 76.219Time 2023-05-30T06:44:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_4810000.chkpt at step 4810000
Episode 40000 - Step 4810663 - Epsilon 0.30039232745863353 - Mean Reward 821.26 - Mean Length 205.07 - Mean Loss 1.658 - Mean Q Value 39.817 - Time Delta 31.535Time 2023-05-30T06:45:16




Episode 40000 - Step 4814246 - Epsilon 0.3001233714743018 - Mean Reward 832.41 - Mean Length 203.64 - Mean Loss 1.598 - Mean Q Value 40.185 - Time Delta 37.877Time 2023-05-30T06:45:54




Episode 40000 - Step 4817313 - Epsilon 0.2998933400500055 - Mean Reward 823.81 - Mean Length 204.11 - Mean Loss 1.567 - Mean Q Value 40.58 - Time Delta 33.195Time 2023-05-30T06:46:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_4820000.chkpt at step 4820000
Episode 40000 - Step 4821465 - Epsilon 0.29958221222737313 - Mean Reward 867.5 - Mean Length 208.77 - Mean Loss 1.546 - Mean Q Value 41.23 - Time Delta 44.907Time 2023-05-30T06:47:12




Episode 40000 - Step 4825346 - Epsilon 0.2992916835151371 - Mean Reward 911.91 - Mean Length 176.58 - Mean Loss 1.52 - Mean Q Value 42.0 - Time Delta 42.0Time 2023-05-30T06:47:54




Episode 40000 - Step 4829174 - Epsilon 0.2990053983474894 - Mean Reward 958.12 - Mean Length 185.11 - Mean Loss 1.494 - Mean Q Value 43.165 - Time Delta 41.472Time 2023-05-30T06:48:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_4830000.chkpt at step 4830000
Episode 40000 - Step 4833736 - Epsilon 0.29866457703831933 - Mean Reward 920.77 - Mean Length 194.9 - Mean Loss 1.496 - Mean Q Value 44.114 - Time Delta 48.804Time 2023-05-30T06:49:25




Episode 40000 - Step 4839579 - Epsilon 0.29822862119162474 - Mean Reward 968.3 - Mean Length 222.66 - Mean Loss 1.473 - Mean Q Value 44.028 - Time Delta 62.735Time 2023-05-30T06:50:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_4840000.chkpt at step 4840000
Episode 40000 - Step 4842845 - Epsilon 0.2979852168752972 - Mean Reward 936.65 - Mean Length 213.8 - Mean Loss 1.467 - Mean Q Value 43.711 - Time Delta 34.787Time 2023-05-30T06:51:02




Episode 40000 - Step 4846027 - Epsilon 0.29774826386617576 - Mean Reward 887.53 - Mean Length 206.81 - Mean Loss 1.46 - Mean Q Value 43.419 - Time Delta 33.304Time 2023-05-30T06:51:35




Episode 40000 - Step 4849835 - Epsilon 0.2974649423659523 - Mean Reward 871.9 - Mean Length 206.61 - Mean Loss 1.482 - Mean Q Value 43.598 - Time Delta 40.816Time 2023-05-30T06:52:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_4850000.chkpt at step 4850000
Episode 40000 - Step 4853188 - Epsilon 0.29721569682645466 - Mean Reward 882.46 - Mean Length 194.52 - Mean Loss 1.511 - Mean Q Value 43.834 - Time Delta 36.292Time 2023-05-30T06:52:53




Episode 40000 - Step 4856011 - Epsilon 0.2970060108238312 - Mean Reward 835.79 - Mean Length 164.32 - Mean Loss 1.549 - Mean Q Value 44.693 - Time Delta 30.41Time 2023-05-30T06:53:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_4860000.chkpt at step 4860000
Episode 40000 - Step 4860337 - Epsilon 0.29668497241576175 - Mean Reward 835.66 - Mean Length 174.92 - Mean Loss 1.581 - Mean Q Value 45.391 - Time Delta 46.303Time 2023-05-30T06:54:09




Episode 40000 - Step 4864524 - Epsilon 0.29637457986208826 - Mean Reward 830.32 - Mean Length 184.97 - Mean Loss 1.639 - Mean Q Value 45.653 - Time Delta 45.077Time 2023-05-30T06:54:54




Episode 40000 - Step 4867920 - Epsilon 0.296123064595649 - Mean Reward 800.93 - Mean Length 180.85 - Mean Loss 1.663 - Mean Q Value 45.254 - Time Delta 36.174Time 2023-05-30T06:55:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_4870000.chkpt at step 4870000
Episode 40000 - Step 4871167 - Epsilon 0.29588278420489333 - Mean Reward 807.94 - Mean Length 179.79 - Mean Loss 1.663 - Mean Q Value 44.755 - Time Delta 35.071Time 2023-05-30T06:56:06




Episode 40000 - Step 4875224 - Epsilon 0.2955828371898223 - Mean Reward 838.51 - Mean Length 192.13 - Mean Loss 1.706 - Mean Q Value 44.413 - Time Delta 43.402Time 2023-05-30T06:56:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_4880000.chkpt at step 4880000
Episode 40000 - Step 4880068 - Epsilon 0.29522510298100124 - Mean Reward 845.02 - Mean Length 197.31 - Mean Loss 1.716 - Mean Q Value 44.498 - Time Delta 51.033Time 2023-05-30T06:57:40




Episode 40000 - Step 4883737 - Epsilon 0.29495443187693243 - Mean Reward 868.63 - Mean Length 192.13 - Mean Loss 1.731 - Mean Q Value 44.672 - Time Delta 39.737Time 2023-05-30T06:58:20




Episode 40000 - Step 4886813 - Epsilon 0.29472769908037866 - Mean Reward 850.42 - Mean Length 188.93 - Mean Loss 1.749 - Mean Q Value 44.871 - Time Delta 33.475Time 2023-05-30T06:58:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_4890000.chkpt at step 4890000
Episode 40000 - Step 4890106 - Epsilon 0.2944851643188292 - Mean Reward 843.44 - Mean Length 189.39 - Mean Loss 1.747 - Mean Q Value 45.167 - Time Delta 35.853Time 2023-05-30T06:59:29




Episode 40000 - Step 4893453 - Epsilon 0.29423885689000395 - Mean Reward 817.3 - Mean Length 182.29 - Mean Loss 1.76 - Mean Q Value 45.324 - Time Delta 36.277Time 2023-05-30T07:00:05




Episode 40000 - Step 4896492 - Epsilon 0.2940153937893119 - Mean Reward 783.5 - Mean Length 164.24 - Mean Loss 1.752 - Mean Q Value 45.048 - Time Delta 32.813Time 2023-05-30T07:00:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_4900000.chkpt at step 4900000
Episode 40000 - Step 4901260 - Epsilon 0.2936651361910857 - Mean Reward 757.2 - Mean Length 175.23 - Mean Loss 1.721 - Mean Q Value 44.955 - Time Delta 50.987Time 2023-05-30T07:01:29




Episode 40000 - Step 4907384 - Epsilon 0.29321587880563765 - Mean Reward 765.93 - Mean Length 205.71 - Mean Loss 1.698 - Mean Q Value 44.54 - Time Delta 65.692Time 2023-05-30T07:02:35




MarioNet saved to ./checkpoints_pytorch/ mario_net_4910000.chkpt at step 4910000
Episode 40000 - Step 4910370 - Epsilon 0.2929970748034791 - Mean Reward 753.59 - Mean Length 202.64 - Mean Loss 1.677 - Mean Q Value 43.724 - Time Delta 32.016Time 2023-05-30T07:03:07




Episode 40000 - Step 4916445 - Epsilon 0.2925524231832709 - Mean Reward 749.51 - Mean Length 229.92 - Mean Loss 1.62 - Mean Q Value 42.882 - Time Delta 64.174Time 2023-05-30T07:04:11




MarioNet saved to ./checkpoints_pytorch/ mario_net_4920000.chkpt at step 4920000
Episode 40000 - Step 4920529 - Epsilon 0.2922538795542987 - Mean Reward 787.58 - Mean Length 240.37 - Mean Loss 1.606 - Mean Q Value 42.033 - Time Delta 44.313Time 2023-05-30T07:04:55




Episode 40000 - Step 4926112 - Epsilon 0.29184625069106895 - Mean Reward 819.36 - Mean Length 248.52 - Mean Loss 1.581 - Mean Q Value 41.039 - Time Delta 59.823Time 2023-05-30T07:05:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_4930000.chkpt at step 4930000
Episode 40000 - Step 4931311 - Epsilon 0.29146716988779797 - Mean Reward 836.8 - Mean Length 239.27 - Mean Loss 1.528 - Mean Q Value 40.116 - Time Delta 56.568Time 2023-05-30T07:06:52




Episode 40000 - Step 4934860 - Epsilon 0.29120868029837044 - Mean Reward 855.39 - Mean Length 244.9 - Mean Loss 1.509 - Mean Q Value 39.228 - Time Delta 37.842Time 2023-05-30T07:07:30




Episode 40000 - Step 4937931 - Epsilon 0.2909851906091865 - Mean Reward 845.89 - Mean Length 214.86 - Mean Loss 1.481 - Mean Q Value 38.415 - Time Delta 33.009Time 2023-05-30T07:08:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_4940000.chkpt at step 4940000
Episode 40000 - Step 4943730 - Epsilon 0.29056364042137406 - Mean Reward 832.45 - Mean Length 232.01 - Mean Loss 1.472 - Mean Q Value 37.662 - Time Delta 62.12Time 2023-05-30T07:09:05




Episode 40000 - Step 4948171 - Epsilon 0.29024122111538697 - Mean Reward 803.9 - Mean Length 220.59 - Mean Loss 1.498 - Mean Q Value 37.212 - Time Delta 46.833Time 2023-05-30T07:09:52




MarioNet saved to ./checkpoints_pytorch/ mario_net_4950000.chkpt at step 4950000
Episode 40000 - Step 4952631 - Epsilon 0.28991778246392375 - Mean Reward 824.29 - Mean Length 213.2 - Mean Loss 1.588 - Mean Q Value 37.026 - Time Delta 48.239Time 2023-05-30T07:10:40




Episode 40000 - Step 4956297 - Epsilon 0.2896521945073316 - Mean Reward 807.91 - Mean Length 214.37 - Mean Loss 1.67 - Mean Q Value 37.192 - Time Delta 39.254Time 2023-05-30T07:11:19




Episode 40000 - Step 4959608 - Epsilon 0.28941253407648276 - Mean Reward 802.06 - Mean Length 216.77 - Mean Loss 1.754 - Mean Q Value 37.497 - Time Delta 35.437Time 2023-05-30T07:11:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_4960000.chkpt at step 4960000
Episode 40000 - Step 4964337 - Epsilon 0.28907057824375043 - Mean Reward 817.59 - Mean Length 206.07 - Mean Loss 1.813 - Mean Q Value 37.82 - Time Delta 50.851Time 2023-05-30T07:12:45




Episode 40000 - Step 4967766 - Epsilon 0.2888228786449641 - Mean Reward 837.3 - Mean Length 195.95 - Mean Loss 1.848 - Mean Q Value 38.027 - Time Delta 36.565Time 2023-05-30T07:13:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_4970000.chkpt at step 4970000
Episode 40000 - Step 4971205 - Epsilon 0.28857466985802865 - Mean Reward 809.98 - Mean Length 185.74 - Mean Loss 1.85 - Mean Q Value 38.4 - Time Delta 37.036Time 2023-05-30T07:13:59




Episode 40000 - Step 4975517 - Epsilon 0.2882637539388038 - Mean Reward 833.64 - Mean Length 192.2 - Mean Loss 1.838 - Mean Q Value 39.042 - Time Delta 46.295Time 2023-05-30T07:14:45




Episode 40000 - Step 4979028 - Epsilon 0.2880108414103574 - Mean Reward 866.29 - Mean Length 194.2 - Mean Loss 1.807 - Mean Q Value 39.424 - Time Delta 37.267Time 2023-05-30T07:15:22




MarioNet saved to ./checkpoints_pytorch/ mario_net_4980000.chkpt at step 4980000
Episode 40000 - Step 4983553 - Epsilon 0.2876852133233751 - Mean Reward 852.79 - Mean Length 192.16 - Mean Loss 1.757 - Mean Q Value 39.902 - Time Delta 48.226Time 2023-05-30T07:16:11




Episode 40000 - Step 4987342 - Epsilon 0.28741283249770433 - Mean Reward 836.98 - Mean Length 195.76 - Mean Loss 1.693 - Mean Q Value 40.489 - Time Delta 41.052Time 2023-05-30T07:16:52




Episode 40000 - Step 4989746 - Epsilon 0.28724014926027297 - Mean Reward 792.38 - Mean Length 185.41 - Mean Loss 1.652 - Mean Q Value 41.155 - Time Delta 25.774Time 2023-05-30T07:17:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_4990000.chkpt at step 4990000
Episode 40000 - Step 4992430 - Epsilon 0.2870474757452017 - Mean Reward 755.56 - Mean Length 169.13 - Mean Loss 1.627 - Mean Q Value 41.654 - Time Delta 28.881Time 2023-05-30T07:17:46




Episode 40000 - Step 4995206 - Epsilon 0.28684833388233444 - Mean Reward 717.76 - Mean Length 161.78 - Mean Loss 1.613 - Mean Q Value 42.272 - Time Delta 30.13Time 2023-05-30T07:18:17




Episode 40000 - Step 4999150 - Epsilon 0.28656564078035207 - Mean Reward 709.6 - Mean Length 155.97 - Mean Loss 1.631 - Mean Q Value 42.889 - Time Delta 42.085Time 2023-05-30T07:18:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_5000000.chkpt at step 5000000
Episode 40000 - Step 5003870 - Epsilon 0.2862276927105154 - Mean Reward 725.91 - Mean Length 165.28 - Mean Loss 1.686 - Mean Q Value 43.385 - Time Delta 50.713Time 2023-05-30T07:19:49




Episode 40000 - Step 5007301 - Epsilon 0.2859822861401681 - Mean Reward 755.45 - Mean Length 175.55 - Mean Loss 1.707 - Mean Q Value 43.643 - Time Delta 36.868Time 2023-05-30T07:20:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_5010000.chkpt at step 5010000
Episode 40000 - Step 5010577 - Epsilon 0.28574816250523594 - Mean Reward 757.62 - Mean Length 181.47 - Mean Loss 1.739 - Mean Q Value 43.606 - Time Delta 34.975Time 2023-05-30T07:21:01




Episode 40000 - Step 5013948 - Epsilon 0.2855074496557863 - Mean Reward 778.54 - Mean Length 187.42 - Mean Loss 1.803 - Mean Q Value 43.573 - Time Delta 35.468Time 2023-05-30T07:21:37




Episode 40000 - Step 5017982 - Epsilon 0.285219660498868 - Mean Reward 773.16 - Mean Length 188.32 - Mean Loss 1.829 - Mean Q Value 43.539 - Time Delta 43.208Time 2023-05-30T07:22:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_5020000.chkpt at step 5020000
Episode 40000 - Step 5022019 - Epsilon 0.2849319477319786 - Mean Reward 736.48 - Mean Length 181.49 - Mean Loss 1.841 - Mean Q Value 43.209 - Time Delta 43.625Time 2023-05-30T07:23:04




Episode 40000 - Step 5025440 - Epsilon 0.28468836383061236 - Mean Reward 720.2 - Mean Length 181.39 - Mean Loss 1.863 - Mean Q Value 42.841 - Time Delta 36.42Time 2023-05-30T07:23:40




Episode 40000 - Step 5028614 - Epsilon 0.2844625531878751 - Mean Reward 697.77 - Mean Length 180.37 - Mean Loss 1.876 - Mean Q Value 42.662 - Time Delta 34.377Time 2023-05-30T07:24:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_5030000.chkpt at step 5030000
Episode 40000 - Step 5031612 - Epsilon 0.28424942835609834 - Mean Reward 686.09 - Mean Length 176.64 - Mean Loss 1.908 - Mean Q Value 42.279 - Time Delta 32.2Time 2023-05-30T07:24:46




Episode 40000 - Step 5035888 - Epsilon 0.28394572803591955 - Mean Reward 720.25 - Mean Length 179.06 - Mean Loss 1.946 - Mean Q Value 41.629 - Time Delta 45.708Time 2023-05-30T07:25:32




Episode 40000 - Step 5039285 - Epsilon 0.28370468946171384 - Mean Reward 751.7 - Mean Length 172.66 - Mean Loss 1.942 - Mean Q Value 41.328 - Time Delta 36.807Time 2023-05-30T07:26:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_5040000.chkpt at step 5040000
Episode 40000 - Step 5042617 - Epsilon 0.28346846182829954 - Mean Reward 787.17 - Mean Length 171.77 - Mean Loss 1.966 - Mean Q Value 41.087 - Time Delta 32.428Time 2023-05-30T07:26:41




Episode 40000 - Step 5047478 - Epsilon 0.28312418596996813 - Mean Reward 843.12 - Mean Length 188.64 - Mean Loss 1.957 - Mean Q Value 40.784 - Time Delta 51.145Time 2023-05-30T07:27:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_5050000.chkpt at step 5050000
Episode 40000 - Step 5054132 - Epsilon 0.2826536003458289 - Mean Reward 890.55 - Mean Length 225.2 - Mean Loss 1.9 - Mean Q Value 40.598 - Time Delta 71.846Time 2023-05-30T07:28:44




Episode 40000 - Step 5056876 - Epsilon 0.28245976644436505 - Mean Reward 823.48 - Mean Length 209.88 - Mean Loss 1.849 - Mean Q Value 40.296 - Time Delta 29.64Time 2023-05-30T07:29:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_5060000.chkpt at step 5060000
Episode 40000 - Step 5060190 - Epsilon 0.2822258454137623 - Mean Reward 814.36 - Mean Length 209.05 - Mean Loss 1.821 - Mean Q Value 39.865 - Time Delta 35.64Time 2023-05-30T07:29:50




Episode 40000 - Step 5065510 - Epsilon 0.281850734496474 - Mean Reward 807.92 - Mean Length 228.93 - Mean Loss 1.78 - Mean Q Value 39.714 - Time Delta 57.25Time 2023-05-30T07:30:47




Episode 40000 - Step 5069945 - Epsilon 0.28153840563482735 - Mean Reward 802.32 - Mean Length 224.67 - Mean Loss 1.724 - Mean Q Value 39.375 - Time Delta 47.166Time 2023-05-30T07:31:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_5070000.chkpt at step 5070000
Episode 40000 - Step 5074162 - Epsilon 0.2812417501355698 - Mean Reward 776.0 - Mean Length 200.3 - Mean Loss 1.667 - Mean Q Value 39.025 - Time Delta 45.583Time 2023-05-30T07:32:20




Episode 40000 - Step 5077768 - Epsilon 0.28098832491469056 - Mean Reward 820.72 - Mean Length 208.92 - Mean Loss 1.608 - Mean Q Value 38.961 - Time Delta 38.476Time 2023-05-30T07:32:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_5080000.chkpt at step 5080000
Episode 40000 - Step 5082302 - Epsilon 0.28067000505043055 - Mean Reward 846.54 - Mean Length 221.12 - Mean Loss 1.587 - Mean Q Value 38.763 - Time Delta 47.981Time 2023-05-30T07:33:46




Episode 40000 - Step 5086070 - Epsilon 0.2804057383617557 - Mean Reward 855.51 - Mean Length 205.6 - Mean Loss 1.552 - Mean Q Value 38.413 - Time Delta 40.5Time 2023-05-30T07:34:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_5090000.chkpt at step 5090000
Episode 40000 - Step 5090093 - Epsilon 0.28012386202744655 - Mean Reward 845.33 - Mean Length 201.48 - Mean Loss 1.559 - Mean Q Value 38.296 - Time Delta 43.591Time 2023-05-30T07:35:10




Episode 40000 - Step 5093266 - Epsilon 0.2799017418561581 - Mean Reward 851.54 - Mean Length 191.04 - Mean Loss 1.582 - Mean Q Value 38.462 - Time Delta 33.875Time 2023-05-30T07:35:44




Episode 40000 - Step 5097213 - Epsilon 0.279625684999784 - Mean Reward 850.63 - Mean Length 194.45 - Mean Loss 1.609 - Mean Q Value 38.953 - Time Delta 42.67Time 2023-05-30T07:36:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_5100000.chkpt at step 5100000
Episode 40000 - Step 5100472 - Epsilon 0.2793979527294635 - Mean Reward 812.9 - Mean Length 181.7 - Mean Loss 1.613 - Mean Q Value 39.61 - Time Delta 35.041Time 2023-05-30T07:37:02




Episode 40000 - Step 5104545 - Epsilon 0.27911360052389733 - Mean Reward 841.95 - Mean Length 184.75 - Mean Loss 1.632 - Mean Q Value 40.133 - Time Delta 43.324Time 2023-05-30T07:37:45




Episode 40000 - Step 5108589 - Epsilon 0.2788315592343674 - Mean Reward 853.23 - Mean Length 184.96 - Mean Loss 1.647 - Mean Q Value 41.082 - Time Delta 43.373Time 2023-05-30T07:38:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_5110000.chkpt at step 5110000
Episode 40000 - Step 5111584 - Epsilon 0.2786228622189699 - Mean Reward 837.37 - Mean Length 183.18 - Mean Loss 1.666 - Mean Q Value 41.94 - Time Delta 32.047Time 2023-05-30T07:39:01




Episode 40000 - Step 5115026 - Epsilon 0.27838321034104874 - Mean Reward 803.68 - Mean Length 178.13 - Mean Loss 1.692 - Mean Q Value 42.423 - Time Delta 36.23Time 2023-05-30T07:39:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_5120000.chkpt at step 5120000
Episode 40000 - Step 5120546 - Epsilon 0.27799930641737 - Mean Reward 802.38 - Mean Length 200.74 - Mean Loss 1.703 - Mean Q Value 42.87 - Time Delta 58.892Time 2023-05-30T07:40:36




Episode 40000 - Step 5123445 - Epsilon 0.2777978993885282 - Mean Reward 748.34 - Mean Length 189.0 - Mean Loss 1.681 - Mean Q Value 43.041 - Time Delta 31.185Time 2023-05-30T07:41:07




Episode 40000 - Step 5127243 - Epsilon 0.2775342554348408 - Mean Reward 745.13 - Mean Length 186.54 - Mean Loss 1.664 - Mean Q Value 42.79 - Time Delta 41.31Time 2023-05-30T07:41:48




MarioNet saved to ./checkpoints_pytorch/ mario_net_5130000.chkpt at step 5130000
Episode 40000 - Step 5130732 - Epsilon 0.2772922816963942 - Mean Reward 760.91 - Mean Length 191.48 - Mean Loss 1.634 - Mean Q Value 42.562 - Time Delta 37.923Time 2023-05-30T07:42:26




Episode 40000 - Step 5134611 - Epsilon 0.2770235078153814 - Mean Reward 792.38 - Mean Length 195.85 - Mean Loss 1.599 - Mean Q Value 42.216 - Time Delta 41.478Time 2023-05-30T07:43:08




Episode 40000 - Step 5137757 - Epsilon 0.27680571445769464 - Mean Reward 815.59 - Mean Length 172.11 - Mean Loss 1.575 - Mean Q Value 42.048 - Time Delta 33.353Time 2023-05-30T07:43:41




MarioNet saved to ./checkpoints_pytorch/ mario_net_5140000.chkpt at step 5140000
Episode 40000 - Step 5141386 - Epsilon 0.27655469632715884 - Mean Reward 860.2 - Mean Length 179.41 - Mean Loss 1.572 - Mean Q Value 42.18 - Time Delta 39.217Time 2023-05-30T07:44:20




Episode 40000 - Step 5146573 - Epsilon 0.2761963064011367 - Mean Reward 867.15 - Mean Length 193.3 - Mean Loss 1.552 - Mean Q Value 42.335 - Time Delta 55.447Time 2023-05-30T07:45:16




Episode 40000 - Step 5149653 - Epsilon 0.27598371707599817 - Mean Reward 856.43 - Mean Length 189.21 - Mean Loss 1.528 - Mean Q Value 42.166 - Time Delta 33.062Time 2023-05-30T07:45:49




MarioNet saved to ./checkpoints_pytorch/ mario_net_5150000.chkpt at step 5150000
Episode 40000 - Step 5153851 - Epsilon 0.2756942240667289 - Mean Reward 899.79 - Mean Length 192.4 - Mean Loss 1.519 - Mean Q Value 42.325 - Time Delta 45.34Time 2023-05-30T07:46:34




Episode 40000 - Step 5157226 - Epsilon 0.27546170514379065 - Mean Reward 912.24 - Mean Length 194.69 - Mean Loss 1.5 - Mean Q Value 42.444 - Time Delta 36.341Time 2023-05-30T07:47:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_5160000.chkpt at step 5160000
Episode 40000 - Step 5160908 - Epsilon 0.2752082592788462 - Mean Reward 896.76 - Mean Length 195.22 - Mean Loss 1.49 - Mean Q Value 42.941 - Time Delta 39.542Time 2023-05-30T07:47:50




Episode 40000 - Step 5163656 - Epsilon 0.27501925611109135 - Mean Reward 877.07 - Mean Length 170.83 - Mean Loss 1.528 - Mean Q Value 43.656 - Time Delta 29.724Time 2023-05-30T07:48:20




Episode 40000 - Step 5166413 - Epsilon 0.2748297643761946 - Mean Reward 872.16 - Mean Length 167.6 - Mean Loss 1.532 - Mean Q Value 44.744 - Time Delta 29.856Time 2023-05-30T07:48:49




Episode 40000 - Step 5169704 - Epsilon 0.27460374115232355 - Mean Reward 831.35 - Mean Length 158.53 - Mean Loss 1.549 - Mean Q Value 45.795 - Time Delta 35.325Time 2023-05-30T07:49:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_5170000.chkpt at step 5170000
Episode 40000 - Step 5172723 - Epsilon 0.27439656214677594 - Mean Reward 815.55 - Mean Length 154.97 - Mean Loss 1.59 - Mean Q Value 46.763 - Time Delta 28.002Time 2023-05-30T07:49:53




Episode 40000 - Step 5176077 - Epsilon 0.27416657703536845 - Mean Reward 826.18 - Mean Length 151.69 - Mean Loss 1.596 - Mean Q Value 47.578 - Time Delta 36.027Time 2023-05-30T07:50:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_5180000.chkpt at step 5180000
Episode 40000 - Step 5180268 - Epsilon 0.27387946940312663 - Mean Reward 831.12 - Mean Length 166.12 - Mean Loss 1.556 - Mean Q Value 48.127 - Time Delta 44.383Time 2023-05-30T07:51:13




Episode 40000 - Step 5183545 - Epsilon 0.2736551855045876 - Mean Reward 829.03 - Mean Length 171.32 - Mean Loss 1.605 - Mean Q Value 48.31 - Time Delta 34.899Time 2023-05-30T07:51:48




Episode 40000 - Step 5186722 - Epsilon 0.273437921138846 - Mean Reward 832.18 - Mean Length 170.18 - Mean Loss 1.606 - Mean Q Value 48.675 - Time Delta 34.39Time 2023-05-30T07:52:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_5190000.chkpt at step 5190000
Episode 40000 - Step 5190020 - Epsilon 0.2732125644607228 - Mean Reward 823.37 - Mean Length 172.97 - Mean Loss 1.579 - Mean Q Value 49.182 - Time Delta 35.432Time 2023-05-30T07:52:58




Episode 40000 - Step 5193123 - Epsilon 0.2730007019741168 - Mean Reward 791.76 - Mean Length 170.46 - Mean Loss 1.598 - Mean Q Value 49.423 - Time Delta 33.555Time 2023-05-30T07:53:31




Episode 40000 - Step 5196244 - Epsilon 0.2727877762282149 - Mean Reward 789.69 - Mean Length 159.76 - Mean Loss 1.622 - Mean Q Value 49.771 - Time Delta 33.888Time 2023-05-30T07:54:05




Episode 40000 - Step 5199965 - Epsilon 0.2725341333615669 - Mean Reward 802.17 - Mean Length 164.2 - Mean Loss 1.585 - Mean Q Value 50.159 - Time Delta 40.3Time 2023-05-30T07:54:46




MarioNet saved to ./checkpoints_pytorch/ mario_net_5200000.chkpt at step 5200000
Episode 40000 - Step 5202617 - Epsilon 0.2723535030940933 - Mean Reward 780.19 - Mean Length 158.95 - Mean Loss 1.633 - Mean Q Value 50.34 - Time Delta 28.371Time 2023-05-30T07:55:14




Episode 40000 - Step 5206556 - Epsilon 0.27208543496008547 - Mean Reward 827.98 - Mean Length 165.36 - Mean Loss 1.663 - Mean Q Value 50.26 - Time Delta 42.12Time 2023-05-30T07:55:56




Episode 40000 - Step 5209088 - Epsilon 0.27191325935753724 - Mean Reward 807.63 - Mean Length 159.65 - Mean Loss 1.648 - Mean Q Value 50.125 - Time Delta 27.212Time 2023-05-30T07:56:23




MarioNet saved to ./checkpoints_pytorch/ mario_net_5210000.chkpt at step 5210000
Episode 40000 - Step 5211905 - Epsilon 0.27172183183500964 - Mean Reward 796.71 - Mean Length 156.61 - Mean Loss 1.678 - Mean Q Value 49.995 - Time Delta 30.078Time 2023-05-30T07:56:53




Episode 40000 - Step 5217562 - Epsilon 0.27133782079409546 - Mean Reward 836.09 - Mean Length 175.97 - Mean Loss 1.715 - Mean Q Value 49.847 - Time Delta 59.886Time 2023-05-30T07:57:53




MarioNet saved to ./checkpoints_pytorch/ mario_net_5220000.chkpt at step 5220000
Episode 40000 - Step 5224120 - Epsilon 0.2708933268547521 - Mean Reward 859.77 - Mean Length 215.03 - Mean Loss 1.691 - Mean Q Value 49.256 - Time Delta 61.752Time 2023-05-30T07:58:55




Episode 40000 - Step 5229214 - Epsilon 0.2705485637343714 - Mean Reward 869.25 - Mean Length 226.58 - Mean Loss 1.68 - Mean Q Value 48.144 - Time Delta 37.91Time 2023-05-30T07:59:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_5230000.chkpt at step 5230000
Episode 40000 - Step 5233540 - Epsilon 0.27025612359160844 - Mean Reward 930.91 - Mean Length 244.52 - Mean Loss 1.698 - Mean Q Value 46.936 - Time Delta 35.149Time 2023-05-30T08:00:08




Episode 40000 - Step 5236688 - Epsilon 0.2700435156679272 - Mean Reward 943.62 - Mean Length 247.83 - Mean Loss 1.666 - Mean Q Value 45.63 - Time Delta 23.249Time 2023-05-30T08:00:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_5240000.chkpt at step 5240000
Episode 40000 - Step 5240074 - Epsilon 0.2698150205275345 - Mean Reward 902.46 - Mean Length 225.12 - Mean Loss 1.643 - Mean Q Value 44.555 - Time Delta 25.083Time 2023-05-30T08:00:56




Episode 40000 - Step 5243971 - Epsilon 0.26955228121857344 - Mean Reward 921.33 - Mean Length 198.51 - Mean Loss 1.643 - Mean Q Value 43.705 - Time Delta 32.592Time 2023-05-30T08:01:29




Episode 40000 - Step 5248547 - Epsilon 0.26924408968945773 - Mean Reward 876.85 - Mean Length 193.33 - Mean Loss 1.64 - Mean Q Value 43.314 - Time Delta 34.89Time 2023-05-30T08:02:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_5250000.chkpt at step 5250000
Episode 40000 - Step 5252324 - Epsilon 0.26898997591831575 - Mean Reward 840.32 - Mean Length 187.84 - Mean Loss 1.652 - Mean Q Value 42.791 - Time Delta 28.304Time 2023-05-30T08:02:32




Episode 40000 - Step 5258946 - Epsilon 0.2685450313619789 - Mean Reward 885.8 - Mean Length 222.58 - Mean Loss 1.654 - Mean Q Value 41.995 - Time Delta 48.185Time 2023-05-30T08:03:20




MarioNet saved to ./checkpoints_pytorch/ mario_net_5260000.chkpt at step 5260000
Episode 40000 - Step 5261996 - Epsilon 0.2683403437970018 - Mean Reward 881.2 - Mean Length 219.22 - Mean Loss 1.683 - Mean Q Value 41.2 - Time Delta 23.467Time 2023-05-30T08:03:44




Episode 40000 - Step 5265578 - Epsilon 0.26810015255075664 - Mean Reward 850.77 - Mean Length 216.07 - Mean Loss 1.68 - Mean Q Value 40.951 - Time Delta 28.771Time 2023-05-30T08:04:13




Episode 40000 - Step 5269165 - Epsilon 0.2678598414744681 - Mean Reward 848.1 - Mean Length 206.18 - Mean Loss 1.698 - Mean Q Value 41.006 - Time Delta 29.003Time 2023-05-30T08:04:42




MarioNet saved to ./checkpoints_pytorch/ mario_net_5270000.chkpt at step 5270000
Episode 40000 - Step 5273553 - Epsilon 0.2675661603051925 - Mean Reward 892.13 - Mean Length 212.29 - Mean Loss 1.697 - Mean Q Value 41.467 - Time Delta 33.704Time 2023-05-30T08:05:15




Episode 40000 - Step 5276876 - Epsilon 0.26734397199396287 - Mean Reward 854.71 - Mean Length 179.3 - Mean Loss 1.728 - Mean Q Value 42.225 - Time Delta 23.596Time 2023-05-30T08:05:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_5280000.chkpt at step 5280000
Episode 40000 - Step 5280153 - Epsilon 0.26712504010972726 - Mean Reward 859.81 - Mean Length 181.57 - Mean Loss 1.719 - Mean Q Value 42.886 - Time Delta 24.338Time 2023-05-30T08:06:03




Episode 40000 - Step 5284002 - Epsilon 0.26686812263697285 - Mean Reward 885.09 - Mean Length 184.24 - Mean Loss 1.722 - Mean Q Value 43.112 - Time Delta 26.322Time 2023-05-30T08:06:30




Episode 40000 - Step 5287126 - Epsilon 0.2666597799755199 - Mean Reward 874.9 - Mean Length 179.61 - Mean Loss 1.723 - Mean Q Value 43.256 - Time Delta 24.224Time 2023-05-30T08:06:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_5290000.chkpt at step 5290000
Episode 40000 - Step 5291106 - Epsilon 0.2663945854174288 - Mean Reward 849.58 - Mean Length 175.53 - Mean Loss 1.72 - Mean Q Value 43.472 - Time Delta 28.266Time 2023-05-30T08:07:22




Episode 40000 - Step 5295388 - Epsilon 0.266109562563773 - Mean Reward 863.51 - Mean Length 185.12 - Mean Loss 1.698 - Mean Q Value 43.943 - Time Delta 34.37Time 2023-05-30T08:07:57




MarioNet saved to ./checkpoints_pytorch/ mario_net_5300000.chkpt at step 5300000
Episode 40000 - Step 5300162 - Epsilon 0.2657921502146288 - Mean Reward 934.92 - Mean Length 200.09 - Mean Loss 1.692 - Mean Q Value 44.682 - Time Delta 35.754Time 2023-05-30T08:08:32




Episode 40000 - Step 5303294 - Epsilon 0.2655841163908484 - Mean Reward 914.31 - Mean Length 192.92 - Mean Loss 1.7 - Mean Q Value 45.571 - Time Delta 21.38Time 2023-05-30T08:08:54




Episode 40000 - Step 5306427 - Epsilon 0.26537617904975497 - Mean Reward 915.42 - Mean Length 193.01 - Mean Loss 1.715 - Mean Q Value 46.394 - Time Delta 22.825Time 2023-05-30T08:09:16




MarioNet saved to ./checkpoints_pytorch/ mario_net_5310000.chkpt at step 5310000
Episode 40000 - Step 5311727 - Epsilon 0.265024788416243 - Mean Reward 907.1 - Mean Length 206.21 - Mean Loss 1.716 - Mean Q Value 46.943 - Time Delta 43.092Time 2023-05-30T08:10:00




Episode 40000 - Step 5314569 - Epsilon 0.2648365551583012 - Mean Reward 865.69 - Mean Length 191.81 - Mean Loss 1.744 - Mean Q Value 47.071 - Time Delta 22.177Time 2023-05-30T08:10:22




Episode 40000 - Step 5317503 - Epsilon 0.26464236874736624 - Mean Reward 787.31 - Mean Length 173.41 - Mean Loss 1.754 - Mean Q Value 46.868 - Time Delta 21.953Time 2023-05-30T08:10:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_5320000.chkpt at step 5320000
Episode 40000 - Step 5320905 - Epsilon 0.2644173860720369 - Mean Reward 800.55 - Mean Length 176.11 - Mean Loss 1.747 - Mean Q Value 46.617 - Time Delta 27.519Time 2023-05-30T08:11:11




Episode 40000 - Step 5324351 - Epsilon 0.2641896885602274 - Mean Reward 823.58 - Mean Length 179.24 - Mean Loss 1.739 - Mean Q Value 46.319 - Time Delta 25.332Time 2023-05-30T08:11:37




Episode 40000 - Step 5327278 - Epsilon 0.26399643844544934 - Mean Reward 818.03 - Mean Length 155.51 - Mean Loss 1.729 - Mean Q Value 46.322 - Time Delta 24.295Time 2023-05-30T08:12:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_5330000.chkpt at step 5330000
Episode 40000 - Step 5331180 - Epsilon 0.2637390354562532 - Mean Reward 838.42 - Mean Length 166.11 - Mean Loss 1.74 - Mean Q Value 46.38 - Time Delta 29.953Time 2023-05-30T08:12:31




Episode 40000 - Step 5334909 - Epsilon 0.26349327928059135 - Mean Reward 852.09 - Mean Length 174.06 - Mean Loss 1.763 - Mean Q Value 46.444 - Time Delta 27.006Time 2023-05-30T08:12:58




Episode 40000 - Step 5339337 - Epsilon 0.26320175357319686 - Mean Reward 869.7 - Mean Length 184.32 - Mean Loss 1.785 - Mean Q Value 46.568 - Time Delta 35.57Time 2023-05-30T08:13:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_5340000.chkpt at step 5340000
Episode 40000 - Step 5343166 - Epsilon 0.2629499242141316 - Mean Reward 838.66 - Mean Length 188.15 - Mean Loss 1.826 - Mean Q Value 46.57 - Time Delta 30.817Time 2023-05-30T08:14:04




Episode 40000 - Step 5346710 - Epsilon 0.2627170537289967 - Mean Reward 867.34 - Mean Length 194.32 - Mean Loss 1.865 - Mean Q Value 46.28 - Time Delta 26.306Time 2023-05-30T08:14:31




MarioNet saved to ./checkpoints_pytorch/ mario_net_5350000.chkpt at step 5350000
Episode 40000 - Step 5351827 - Epsilon 0.2623811877700511 - Mean Reward 883.09 - Mean Length 206.47 - Mean Loss 1.868 - Mean Q Value 46.221 - Time Delta 36.291Time 2023-05-30T08:15:07




Episode 40000 - Step 5355609 - Epsilon 0.26213322356953245 - Mean Reward 893.84 - Mean Length 207.0 - Mean Loss 1.866 - Mean Q Value 46.253 - Time Delta 27.39Time 2023-05-30T08:15:34




Episode 40000 - Step 5359856 - Epsilon 0.2618550512854425 - Mean Reward 884.07 - Mean Length 205.19 - Mean Loss 1.849 - Mean Q Value 46.054 - Time Delta 33.608Time 2023-05-30T08:16:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_5360000.chkpt at step 5360000
Episode 40000 - Step 5363351 - Epsilon 0.2616263553318567 - Mean Reward 891.72 - Mean Length 201.85 - Mean Loss 1.856 - Mean Q Value 45.69 - Time Delta 26.772Time 2023-05-30T08:16:35




Episode 40000 - Step 5366918 - Epsilon 0.2613931539944076 - Mean Reward 882.32 - Mean Length 202.08 - Mean Loss 1.824 - Mean Q Value 45.396 - Time Delta 26.373Time 2023-05-30T08:17:01




Episode 40000 - Step 5369976 - Epsilon 0.26119339527068286 - Mean Reward 847.39 - Mean Length 181.49 - Mean Loss 1.8 - Mean Q Value 45.049 - Time Delta 23.957Time 2023-05-30T08:17:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_5370000.chkpt at step 5370000
Episode 40000 - Step 5374037 - Epsilon 0.26092835320770885 - Mean Reward 845.28 - Mean Length 184.28 - Mean Loss 1.784 - Mean Q Value 44.714 - Time Delta 29.341Time 2023-05-30T08:17:54




Episode 40000 - Step 5378677 - Epsilon 0.2606258517648907 - Mean Reward 853.91 - Mean Length 188.21 - Mean Loss 1.77 - Mean Q Value 44.362 - Time Delta 41.598Time 2023-05-30T08:18:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_5380000.chkpt at step 5380000
Episode 40000 - Step 5382322 - Epsilon 0.26038846460371573 - Mean Reward 859.21 - Mean Length 189.71 - Mean Loss 1.725 - Mean Q Value 44.237 - Time Delta 29.384Time 2023-05-30T08:19:05




Episode 40000 - Step 5385514 - Epsilon 0.2601807574691389 - Mean Reward 841.21 - Mean Length 185.96 - Mean Loss 1.723 - Mean Q Value 44.342 - Time Delta 27.563Time 2023-05-30T08:19:33




Episode 40000 - Step 5389117 - Epsilon 0.2599465051397657 - Mean Reward 876.65 - Mean Length 191.41 - Mean Loss 1.708 - Mean Q Value 44.378 - Time Delta 29.254Time 2023-05-30T08:20:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_5390000.chkpt at step 5390000
Episode 40000 - Step 5393519 - Epsilon 0.2596605913280112 - Mean Reward 880.21 - Mean Length 194.82 - Mean Loss 1.714 - Mean Q Value 44.623 - Time Delta 31.513Time 2023-05-30T08:20:34




Episode 40000 - Step 5397590 - Epsilon 0.2593964561627144 - Mean Reward 854.23 - Mean Length 189.13 - Mean Loss 1.71 - Mean Q Value 44.883 - Time Delta 31.887Time 2023-05-30T08:21:05




MarioNet saved to ./checkpoints_pytorch/ mario_net_5400000.chkpt at step 5400000
Episode 40000 - Step 5401246 - Epsilon 0.2591594760885198 - Mean Reward 871.26 - Mean Length 189.24 - Mean Loss 1.718 - Mean Q Value 45.177 - Time Delta 27.781Time 2023-05-30T08:21:33




Episode 40000 - Step 5405159 - Epsilon 0.258906077263235 - Mean Reward 873.21 - Mean Length 196.45 - Mean Loss 1.735 - Mean Q Value 45.292 - Time Delta 32.637Time 2023-05-30T08:22:06




Episode 40000 - Step 5408982 - Epsilon 0.2586587459612124 - Mean Reward 879.27 - Mean Length 198.65 - Mean Loss 1.775 - Mean Q Value 45.439 - Time Delta 31.506Time 2023-05-30T08:22:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_5410000.chkpt at step 5410000
Episode 40000 - Step 5412870 - Epsilon 0.2584074517774487 - Mean Reward 857.81 - Mean Length 193.51 - Mean Loss 1.811 - Mean Q Value 45.104 - Time Delta 32.498Time 2023-05-30T08:23:10




Episode 40000 - Step 5416954 - Epsilon 0.2581437523776363 - Mean Reward 860.29 - Mean Length 193.64 - Mean Loss 1.861 - Mean Q Value 44.979 - Time Delta 29.6Time 2023-05-30T08:23:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_5420000.chkpt at step 5420000
Episode 40000 - Step 5421102 - Epsilon 0.2578761960248852 - Mean Reward 864.02 - Mean Length 198.56 - Mean Loss 1.87 - Mean Q Value 44.9 - Time Delta 32.544Time 2023-05-30T08:24:12




Episode 40000 - Step 5424399 - Epsilon 0.25766372911867597 - Mean Reward 859.95 - Mean Length 192.4 - Mean Loss 1.887 - Mean Q Value 44.762 - Time Delta 25.634Time 2023-05-30T08:24:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_5430000.chkpt at step 5430000
Episode 40000 - Step 5430363 - Epsilon 0.25727983871193383 - Mean Reward 847.18 - Mean Length 213.81 - Mean Loss 1.876 - Mean Q Value 44.507 - Time Delta 46.388Time 2023-05-30T08:25:24




Episode 40000 - Step 5435127 - Epsilon 0.256973600786589 - Mean Reward 851.86 - Mean Length 222.57 - Mean Loss 1.84 - Mean Q Value 44.203 - Time Delta 35.091Time 2023-05-30T08:25:59




Episode 40000 - Step 5438714 - Epsilon 0.2567432629744108 - Mean Reward 843.77 - Mean Length 217.6 - Mean Loss 1.833 - Mean Q Value 43.734 - Time Delta 26.046Time 2023-05-30T08:26:25




MarioNet saved to ./checkpoints_pytorch/ mario_net_5440000.chkpt at step 5440000
Episode 40000 - Step 5441481 - Epsilon 0.2565657222140225 - Mean Reward 806.44 - Mean Length 203.79 - Mean Loss 1.84 - Mean Q Value 43.262 - Time Delta 19.993Time 2023-05-30T08:26:45




Episode 40000 - Step 5445459 - Epsilon 0.25631069440496324 - Mean Reward 829.08 - Mean Length 210.6 - Mean Loss 1.805 - Mean Q Value 42.757 - Time Delta 30.711Time 2023-05-30T08:27:16




Episode 40000 - Step 5448874 - Epsilon 0.2560919625067086 - Mean Reward 839.73 - Mean Length 185.11 - Mean Loss 1.759 - Mean Q Value 42.418 - Time Delta 28.044Time 2023-05-30T08:27:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_5450000.chkpt at step 5450000
Episode 40000 - Step 5452444 - Epsilon 0.2558635023670848 - Mean Reward 852.16 - Mean Length 173.17 - Mean Loss 1.731 - Mean Q Value 42.434 - Time Delta 29.83Time 2023-05-30T08:28:14




Episode 40000 - Step 5455385 - Epsilon 0.25567544784545315 - Mean Reward 825.14 - Mean Length 166.71 - Mean Loss 1.681 - Mean Q Value 42.513 - Time Delta 25.883Time 2023-05-30T08:28:40




Episode 40000 - Step 5459252 - Epsilon 0.25542839301470255 - Mean Reward 840.99 - Mean Length 177.71 - Mean Loss 1.632 - Mean Q Value 42.67 - Time Delta 32.791Time 2023-05-30T08:29:12




MarioNet saved to ./checkpoints_pytorch/ mario_net_5460000.chkpt at step 5460000
Episode 40000 - Step 5462747 - Epsilon 0.2552053099020169 - Mean Reward 818.17 - Mean Length 172.88 - Mean Loss 1.604 - Mean Q Value 42.897 - Time Delta 29.187Time 2023-05-30T08:29:42




Episode 40000 - Step 5466548 - Epsilon 0.2549629162114916 - Mean Reward 822.45 - Mean Length 176.74 - Mean Loss 1.585 - Mean Q Value 43.129 - Time Delta 33.1Time 2023-05-30T08:30:15




Episode 40000 - Step 5469146 - Epsilon 0.254797371543148 - Mean Reward 770.53 - Mean Length 167.02 - Mean Loss 1.545 - Mean Q Value 43.425 - Time Delta 22.589Time 2023-05-30T08:30:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_5470000.chkpt at step 5470000
Episode 40000 - Step 5473010 - Epsilon 0.2545513560960432 - Mean Reward 809.61 - Mean Length 176.25 - Mean Loss 1.535 - Mean Q Value 43.938 - Time Delta 31.432Time 2023-05-30T08:31:09




Episode 40000 - Step 5476886 - Epsilon 0.25430481526945475 - Mean Reward 828.09 - Mean Length 176.34 - Mean Loss 1.505 - Mean Q Value 44.573 - Time Delta 31.285Time 2023-05-30T08:31:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_5480000.chkpt at step 5480000
Episode 40000 - Step 5480846 - Epsilon 0.2540531780518237 - Mean Reward 846.95 - Mean Length 180.99 - Mean Loss 1.504 - Mean Q Value 45.27 - Time Delta 33.133Time 2023-05-30T08:32:13




Episode 40000 - Step 5484696 - Epsilon 0.2538087694778584 - Mean Reward 822.44 - Mean Length 181.48 - Mean Loss 1.526 - Mean Q Value 45.785 - Time Delta 32.311Time 2023-05-30T08:32:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_5490000.chkpt at step 5490000
Episode 40000 - Step 5490814 - Epsilon 0.25342086564147986 - Mean Reward 884.0 - Mean Length 216.68 - Mean Loss 1.541 - Mean Q Value 45.85 - Time Delta 51.754Time 2023-05-30T08:33:37




Episode 40000 - Step 5495175 - Epsilon 0.25314472406718647 - Mean Reward 928.18 - Mean Length 221.65 - Mean Loss 1.527 - Mean Q Value 45.486 - Time Delta 38.02Time 2023-05-30T08:34:15




Episode 40000 - Step 5498199 - Epsilon 0.2529534189542959 - Mean Reward 904.69 - Mean Length 213.13 - Mean Loss 1.526 - Mean Q Value 44.963 - Time Delta 23.136Time 2023-05-30T08:34:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_5500000.chkpt at step 5500000
Episode 40000 - Step 5501590 - Epsilon 0.25273906853717315 - Mean Reward 896.96 - Mean Length 207.44 - Mean Loss 1.544 - Mean Q Value 44.537 - Time Delta 30.291Time 2023-05-30T08:35:09




Episode 40000 - Step 5505114 - Epsilon 0.252516503444259 - Mean Reward 902.8 - Mean Length 204.18 - Mean Loss 1.548 - Mean Q Value 44.27 - Time Delta 33.185Time 2023-05-30T08:35:42




Episode 40000 - Step 5509283 - Epsilon 0.2522534551904601 - Mean Reward 909.98 - Mean Length 184.69 - Mean Loss 1.57 - Mean Q Value 44.258 - Time Delta 37.584Time 2023-05-30T08:36:19




MarioNet saved to ./checkpoints_pytorch/ mario_net_5510000.chkpt at step 5510000
Episode 40000 - Step 5512787 - Epsilon 0.25203257789445305 - Mean Reward 867.49 - Mean Length 176.12 - Mean Loss 1.612 - Mean Q Value 44.435 - Time Delta 27.237Time 2023-05-30T08:36:47




Episode 40000 - Step 5515627 - Epsilon 0.25185369825155085 - Mean Reward 862.22 - Mean Length 174.28 - Mean Loss 1.667 - Mean Q Value 44.716 - Time Delta 22.117Time 2023-05-30T08:37:09




Episode 40000 - Step 5518746 - Epsilon 0.2516573918506775 - Mean Reward 855.89 - Mean Length 171.56 - Mean Loss 1.664 - Mean Q Value 44.948 - Time Delta 30.437Time 2023-05-30T08:37:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_5520000.chkpt at step 5520000
Episode 40000 - Step 5521757 - Epsilon 0.25146802800602275 - Mean Reward 848.9 - Mean Length 166.43 - Mean Loss 1.68 - Mean Q Value 45.392 - Time Delta 25.41Time 2023-05-30T08:38:05




Episode 40000 - Step 5525351 - Epsilon 0.25124218542956855 - Mean Reward 851.09 - Mean Length 160.68 - Mean Loss 1.681 - Mean Q Value 45.886 - Time Delta 29.944Time 2023-05-30T08:38:35




Episode 40000 - Step 5528852 - Epsilon 0.25102238188482334 - Mean Reward 844.94 - Mean Length 160.65 - Mean Loss 1.654 - Mean Q Value 46.797 - Time Delta 27.939Time 2023-05-30T08:39:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_5530000.chkpt at step 5530000
Episode 40000 - Step 5532518 - Epsilon 0.25079242523677947 - Mean Reward 869.28 - Mean Length 168.91 - Mean Loss 1.644 - Mean Q Value 47.803 - Time Delta 30.078Time 2023-05-30T08:39:33




Episode 40000 - Step 5535360 - Epsilon 0.25061430048266986 - Mean Reward 853.53 - Mean Length 166.14 - Mean Loss 1.655 - Mean Q Value 48.71 - Time Delta 28.342Time 2023-05-30T08:40:01




Episode 40000 - Step 5538685 - Epsilon 0.2504060638796599 - Mean Reward 858.91 - Mean Length 169.28 - Mean Loss 1.636 - Mean Q Value 49.433 - Time Delta 24.959Time 2023-05-30T08:40:26




MarioNet saved to ./checkpoints_pytorch/ mario_net_5540000.chkpt at step 5540000
Episode 40000 - Step 5543166 - Epsilon 0.2501257035176826 - Mean Reward 800.12 - Mean Length 178.15 - Mean Loss 1.656 - Mean Q Value 49.883 - Time Delta 34.65Time 2023-05-30T08:41:01




Episode 40000 - Step 5546268 - Epsilon 0.24993180620374222 - Mean Reward 763.61 - Mean Length 174.16 - Mean Loss 1.687 - Mean Q Value 49.687 - Time Delta 23.281Time 2023-05-30T08:41:24




MarioNet saved to ./checkpoints_pytorch/ mario_net_5550000.chkpt at step 5550000
Episode 40000 - Step 5550010 - Epsilon 0.2496981043009019 - Mean Reward 778.39 - Mean Length 174.92 - Mean Loss 1.704 - Mean Q Value 49.243 - Time Delta 28.749Time 2023-05-30T08:41:53




Episode 40000 - Step 5553558 - Epsilon 0.2494767202530218 - Mean Reward 799.88 - Mean Length 181.98 - Mean Loss 1.738 - Mean Q Value 48.953 - Time Delta 27.897Time 2023-05-30T08:42:20




Episode 40000 - Step 5556987 - Epsilon 0.24926294794907888 - Mean Reward 783.36 - Mean Length 183.02 - Mean Loss 1.763 - Mean Q Value 48.384 - Time Delta 29.956Time 2023-05-30T08:42:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_5560000.chkpt at step 5560000
Episode 40000 - Step 5562640 - Epsilon 0.24891092585004354 - Mean Reward 816.73 - Mean Length 194.74 - Mean Loss 1.785 - Mean Q Value 47.573 - Time Delta 44.364Time 2023-05-30T08:43:35




Episode 40000 - Step 5566073 - Epsilon 0.24869738966803218 - Mean Reward 839.65 - Mean Length 198.05 - Mean Loss 1.797 - Mean Q Value 46.815 - Time Delta 26.466Time 2023-05-30T08:44:01




Episode 40000 - Step 5569214 - Epsilon 0.2485021766739001 - Mean Reward 815.99 - Mean Length 192.04 - Mean Loss 1.814 - Mean Q Value 45.931 - Time Delta 28.779Time 2023-05-30T08:44:30




MarioNet saved to ./checkpoints_pytorch/ mario_net_5570000.chkpt at step 5570000
Episode 40000 - Step 5572328 - Epsilon 0.24830879298952488 - Mean Reward 795.52 - Mean Length 187.7 - Mean Loss 1.806 - Mean Q Value 44.996 - Time Delta 28.634Time 2023-05-30T08:44:59




Episode 40000 - Step 5577194 - Epsilon 0.24800690896326263 - Mean Reward 802.45 - Mean Length 202.07 - Mean Loss 1.817 - Mean Q Value 44.161 - Time Delta 45.932Time 2023-05-30T08:45:45




MarioNet saved to ./checkpoints_pytorch/ mario_net_5580000.chkpt at step 5580000
Episode 40000 - Step 5580617 - Epsilon 0.24779476780721368 - Mean Reward 807.16 - Mean Length 179.77 - Mean Loss 1.807 - Mean Q Value 43.481 - Time Delta 26.388Time 2023-05-30T08:46:11




Episode 40000 - Step 5584968 - Epsilon 0.24752537555709236 - Mean Reward 842.91 - Mean Length 188.95 - Mean Loss 1.8 - Mean Q Value 43.348 - Time Delta 36.513Time 2023-05-30T08:46:47




Episode 40000 - Step 5588445 - Epsilon 0.24731030758493444 - Mean Reward 843.88 - Mean Length 192.31 - Mean Loss 1.77 - Mean Q Value 43.398 - Time Delta 26.539Time 2023-05-30T08:47:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_5590000.chkpt at step 5590000
Episode 40000 - Step 5591291 - Epsilon 0.24713440886246635 - Mean Reward 844.22 - Mean Length 189.63 - Mean Loss 1.77 - Mean Q Value 43.312 - Time Delta 23.199Time 2023-05-30T08:47:37




Episode 40000 - Step 5595648 - Epsilon 0.24686536422897856 - Mean Reward 854.26 - Mean Length 184.54 - Mean Loss 1.749 - Mean Q Value 43.352 - Time Delta 33.647Time 2023-05-30T08:48:11




Episode 40000 - Step 5599507 - Epsilon 0.24662731568617802 - Mean Reward 824.99 - Mean Length 188.9 - Mean Loss 1.719 - Mean Q Value 43.752 - Time Delta 33.533Time 2023-05-30T08:48:44




MarioNet saved to ./checkpoints_pytorch/ mario_net_5600000.chkpt at step 5600000
Episode 40000 - Step 5603008 - Epsilon 0.24641154953955732 - Mean Reward 802.3 - Mean Length 180.4 - Mean Loss 1.706 - Mean Q Value 44.017 - Time Delta 29.395Time 2023-05-30T08:49:14




Episode 40000 - Step 5605165 - Epsilon 0.24627870791548667 - Mean Reward 746.69 - Mean Length 167.2 - Mean Loss 1.699 - Mean Q Value 44.275 - Time Delta 15.402Time 2023-05-30T08:49:29




Episode 40000 - Step 5608418 - Epsilon 0.24607850315051788 - Mean Reward 737.37 - Mean Length 171.27 - Mean Loss 1.637 - Mean Q Value 44.535 - Time Delta 26.186Time 2023-05-30T08:49:55




MarioNet saved to ./checkpoints_pytorch/ mario_net_5610000.chkpt at step 5610000
Episode 40000 - Step 5612952 - Epsilon 0.24579973115618683 - Mean Reward 725.58 - Mean Length 173.04 - Mean Loss 1.61 - Mean Q Value 44.593 - Time Delta 37.112Time 2023-05-30T08:50:33




Episode 40000 - Step 5616689 - Epsilon 0.24557019996508406 - Mean Reward 781.71 - Mean Length 171.82 - Mean Loss 1.597 - Mean Q Value 44.624 - Time Delta 27.396Time 2023-05-30T08:51:00




MarioNet saved to ./checkpoints_pytorch/ mario_net_5620000.chkpt at step 5620000
Episode 40000 - Step 5621506 - Epsilon 0.24527465000852605 - Mean Reward 744.05 - Mean Length 184.98 - Mean Loss 1.581 - Mean Q Value 44.363 - Time Delta 36.991Time 2023-05-30T08:51:37




Episode 40000 - Step 5624595 - Epsilon 0.24508530975477869 - Mean Reward 791.77 - Mean Length 194.3 - Mean Loss 1.584 - Mean Q Value 43.984 - Time Delta 22.762Time 2023-05-30T08:52:00




Episode 40000 - Step 5628202 - Epsilon 0.24486440366506512 - Mean Reward 816.67 - Mean Length 197.84 - Mean Loss 1.616 - Mean Q Value 43.723 - Time Delta 29.473Time 2023-05-30T08:52:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_5630000.chkpt at step 5630000
Episode 40000 - Step 5631806 - Epsilon 0.24464388017051944 - Mean Reward 859.7 - Mean Length 188.54 - Mean Loss 1.618 - Mean Q Value 43.685 - Time Delta 29.987Time 2023-05-30T08:52:59




Episode 40000 - Step 5634899 - Epsilon 0.24445478238586704 - Mean Reward 821.61 - Mean Length 182.1 - Mean Loss 1.601 - Mean Q Value 43.68 - Time Delta 26.85Time 2023-05-30T08:53:26




Episode 40000 - Step 5638509 - Epsilon 0.2442342614422119 - Mean Reward 869.16 - Mean Length 170.03 - Mean Loss 1.597 - Mean Q Value 43.998 - Time Delta 28.232Time 2023-05-30T08:53:54




MarioNet saved to ./checkpoints_pytorch/ mario_net_5640000.chkpt at step 5640000
Episode 40000 - Step 5641867 - Epsilon 0.24402931279331613 - Mean Reward 875.08 - Mean Length 172.72 - Mean Loss 1.593 - Mean Q Value 44.652 - Time Delta 24.922Time 2023-05-30T08:54:19




Episode 40000 - Step 5644989 - Epsilon 0.2438389222004488 - Mean Reward 878.87 - Mean Length 167.87 - Mean Loss 1.595 - Mean Q Value 45.275 - Time Delta 24.53Time 2023-05-30T08:54:44




Episode 40000 - Step 5649037 - Epsilon 0.24359228200130836 - Mean Reward 865.01 - Mean Length 172.31 - Mean Loss 1.613 - Mean Q Value 45.836 - Time Delta 30.37Time 2023-05-30T08:55:14




MarioNet saved to ./checkpoints_pytorch/ mario_net_5650000.chkpt at step 5650000
Episode 40000 - Step 5652234 - Epsilon 0.24339766862829343 - Mean Reward 883.62 - Mean Length 173.35 - Mean Loss 1.663 - Mean Q Value 46.643 - Time Delta 25.076Time 2023-05-30T08:55:39




Episode 40000 - Step 5655741 - Epsilon 0.24318436321705345 - Mean Reward 884.29 - Mean Length 172.32 - Mean Loss 1.657 - Mean Q Value 47.422 - Time Delta 28.294Time 2023-05-30T08:56:07




Episode 40000 - Step 5659768 - Epsilon 0.24293966052664712 - Mean Reward 917.2 - Mean Length 179.01 - Mean Loss 1.679 - Mean Q Value 48.103 - Time Delta 31.667Time 2023-05-30T08:56:39




MarioNet saved to ./checkpoints_pytorch/ mario_net_5660000.chkpt at step 5660000
Episode 40000 - Step 5663441 - Epsilon 0.2427166835454388 - Mean Reward 916.0 - Mean Length 184.52 - Mean Loss 1.705 - Mean Q Value 49.028 - Time Delta 26.467Time 2023-05-30T08:57:06




Episode 40000 - Step 5667165 - Epsilon 0.24249081944084738 - Mean Reward 918.68 - Mean Length 181.28 - Mean Loss 1.74 - Mean Q Value 49.982 - Time Delta 30.876Time 2023-05-30T08:57:36




MarioNet saved to ./checkpoints_pytorch/ mario_net_5670000.chkpt at step 5670000
Episode 40000 - Step 5673492 - Epsilon 0.24210756272729644 - Mean Reward 970.32 - Mean Length 212.58 - Mean Loss 1.776 - Mean Q Value 50.359 - Time Delta 53.563Time 2023-05-30T08:58:30




Episode 40000 - Step 5676782 - Epsilon 0.24190851110324585 - Mean Reward 959.42 - Mean Length 210.41 - Mean Loss 1.845 - Mean Q Value 50.392 - Time Delta 28.845Time 2023-05-30T08:58:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_5680000.chkpt at step 5680000
Episode 40000 - Step 5680194 - Epsilon 0.24170225109986107 - Mean Reward 923.41 - Mean Length 204.26 - Mean Loss 1.864 - Mean Q Value 50.214 - Time Delta 26.646Time 2023-05-30T08:59:25




Episode 40000 - Step 5685658 - Epsilon 0.2413723111838372 - Mean Reward 949.61 - Mean Length 222.17 - Mean Loss 1.882 - Mean Q Value 49.735 - Time Delta 42.365Time 2023-05-30T09:00:08




MarioNet saved to ./checkpoints_pytorch/ mario_net_5690000.chkpt at step 5690000
Episode 40000 - Step 5690132 - Epsilon 0.24110248714723553 - Mean Reward 926.54 - Mean Length 229.67 - Mean Loss 1.874 - Mean Q Value 48.675 - Time Delta 35.213Time 2023-05-30T09:00:43




Episode 40000 - Step 5693838 - Epsilon 0.24087920911447333 - Mean Reward 872.23 - Mean Length 203.46 - Mean Loss 1.834 - Mean Q Value 47.699 - Time Delta 28.65Time 2023-05-30T09:01:12




Episode 40000 - Step 5697238 - Epsilon 0.2406745487540896 - Mean Reward 872.49 - Mean Length 204.56 - Mean Loss 1.789 - Mean Q Value 46.902 - Time Delta 25.621Time 2023-05-30T09:01:37




MarioNet saved to ./checkpoints_pytorch/ mario_net_5700000.chkpt at step 5700000
Episode 40000 - Step 5700601 - Epsilon 0.24047228663976442 - Mean Reward 883.59 - Mean Length 204.07 - Mean Loss 1.766 - Mean Q Value 46.115 - Time Delta 24.217Time 2023-05-30T09:02:02




Episode 40000 - Step 5704395 - Epsilon 0.24024430678389608 - Mean Reward 885.63 - Mean Length 187.37 - Mean Loss 1.723 - Mean Q Value 45.405 - Time Delta 28.61Time 2023-05-30T09:02:30




Episode 40000 - Step 5708108 - Epsilon 0.24002140344924533 - Mean Reward 902.15 - Mean Length 179.76 - Mean Loss 1.711 - Mean Q Value 45.371 - Time Delta 30.858Time 2023-05-30T09:03:01




MarioNet saved to ./checkpoints_pytorch/ mario_net_5710000.chkpt at step 5710000
Episode 40000 - Step 5711189 - Epsilon 0.23983659812240063 - Mean Reward 877.21 - Mean Length 173.51 - Mean Loss 1.712 - Mean Q Value 45.702 - Time Delta 23.625Time 2023-05-30T09:03:25




Episode 40000 - Step 5715571 - Epsilon 0.23957400096015438 - Mean Reward 896.87 - Mean Length 183.33 - Mean Loss 1.733 - Mean Q Value 46.135 - Time Delta 36.913Time 2023-05-30T09:04:02




Episode 40000 - Step 5718865 - Epsilon 0.2393767929572252 - Mean Reward 886.75 - Mean Length 182.64 - Mean Loss 1.751 - Mean Q Value 46.375 - Time Delta 27.089Time 2023-05-30T09:04:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_5720000.chkpt at step 5720000
Episode 40000 - Step 5722549 - Epsilon 0.23915642839679271 - Mean Reward 887.77 - Mean Length 181.54 - Mean Loss 1.789 - Mean Q Value 46.677 - Time Delta 30.169Time 2023-05-30T09:04:59




Episode 40000 - Step 5727685 - Epsilon 0.23884954856334167 - Mean Reward 858.43 - Mean Length 195.77 - Mean Loss 1.808 - Mean Q Value 47.002 - Time Delta 43.827Time 2023-05-30T09:05:43




MarioNet saved to ./checkpoints_pytorch/ mario_net_5730000.chkpt at step 5730000
Episode 40000 - Step 5731482 - Epsilon 0.2386229281777253 - Mean Reward 853.8 - Mean Length 202.93 - Mean Loss 1.834 - Mean Q Value 47.153 - Time Delta 30.046Time 2023-05-30T09:06:13




Episode 40000 - Step 5735538 - Epsilon 0.23838108713242115 - Mean Reward 867.38 - Mean Length 199.67 - Mean Loss 1.861 - Mean Q Value 47.062 - Time Delta 34.019Time 2023-05-30T09:06:47




Episode 40000 - Step 5739153 - Epsilon 0.23816574751922245 - Mean Reward 888.11 - Mean Length 202.88 - Mean Loss 1.867 - Mean Q Value 47.166 - Time Delta 30.363Time 2023-05-30T09:07:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_5740000.chkpt at step 5740000
Episode 40000 - Step 5741896 - Epsilon 0.23800248132372465 - Mean Reward 838.67 - Mean Length 193.47 - Mean Loss 1.87 - Mean Q Value 47.181 - Time Delta 21.382Time 2023-05-30T09:07:38




Episode 40000 - Step 5744825 - Epsilon 0.23782826777669344 - Mean Reward 841.72 - Mean Length 171.4 - Mean Loss 1.892 - Mean Q Value 47.037 - Time Delta 23.876Time 2023-05-30T09:08:02




Episode 40000 - Step 5748891 - Epsilon 0.23758663814132921 - Mean Reward 877.02 - Mean Length 174.09 - Mean Loss 1.882 - Mean Q Value 46.86 - Time Delta 31.757Time 2023-05-30T09:08:34




MarioNet saved to ./checkpoints_pytorch/ mario_net_5750000.chkpt at step 5750000
Episode 40000 - Step 5752394 - Epsilon 0.23737866269749047 - Mean Reward 832.66 - Mean Length 168.56 - Mean Loss 1.879 - Mean Q Value 46.802 - Time Delta 25.849Time 2023-05-30T09:09:00




Episode 40000 - Step 5755819 - Epsilon 0.23717549418606382 - Mean Reward 800.82 - Mean Length 166.66 - Mean Loss 1.872 - Mean Q Value 46.808 - Time Delta 26.846Time 2023-05-30T09:09:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_5760000.chkpt at step 5760000
Episode 40000 - Step 5760051 - Epsilon 0.23692469517789985 - Mean Reward 851.8 - Mean Length 181.55 - Mean Loss 1.854 - Mean Q Value 46.607 - Time Delta 35.131Time 2023-05-30T09:10:02




Episode 40000 - Step 5764168 - Epsilon 0.23668096585560702 - Mean Reward 872.92 - Mean Length 193.43 - Mean Loss 1.841 - Mean Q Value 46.463 - Time Delta 31.489Time 2023-05-30T09:10:33




Episode 40000 - Step 5768292 - Epsilon 0.23643707349740367 - Mean Reward 869.01 - Mean Length 194.01 - Mean Loss 1.815 - Mean Q Value 46.542 - Time Delta 32.529Time 2023-05-30T09:11:06




MarioNet saved to ./checkpoints_pytorch/ mario_net_5770000.chkpt at step 5770000
Episode 40000 - Step 5772952 - Epsilon 0.2361617846592136 - Mean Reward 879.16 - Mean Length 205.58 - Mean Loss 1.782 - Mean Q Value 46.727 - Time Delta 38.315Time 2023-05-30T09:11:44




Episode 40000 - Step 5776909 - Epsilon 0.23592827710223638 - Mean Reward 920.87 - Mean Length 210.9 - Mean Loss 1.746 - Mean Q Value 46.607 - Time Delta 34.069Time 2023-05-30T09:12:18




MarioNet saved to ./checkpoints_pytorch/ mario_net_5780000.chkpt at step 5780000
Episode 40000 - Step 5780655 - Epsilon 0.23570743366917216 - Mean Reward 913.84 - Mean Length 206.04 - Mean Loss 1.716 - Mean Q Value 46.677 - Time Delta 31.219Time 2023-05-30T09:12:50




Episode 40000 - Step 5784022 - Epsilon 0.23550911039308164 - Mean Reward 918.35 - Mean Length 198.54 - Mean Loss 1.679 - Mean Q Value 46.654 - Time Delta 28.074Time 2023-05-30T09:13:18




Episode 40000 - Step 5788045 - Epsilon 0.23527236614839656 - Mean Reward 933.14 - Mean Length 197.53 - Mean Loss 1.662 - Mean Q Value 46.485 - Time Delta 29.078Time 2023-05-30T09:13:47




MarioNet saved to ./checkpoints_pytorch/ mario_net_5790000.chkpt at step 5790000
Episode 40000 - Step 5791915 - Epsilon 0.23504485018427204 - Mean Reward 957.83 - Mean Length 189.63 - Mean Loss 1.65 - Mean Q Value 46.262 - Time Delta 29.937Time 2023-05-30T09:14:17




Episode 40000 - Step 5795476 - Epsilon 0.23483569459441764 - Mean Reward 947.44 - Mean Length 185.67 - Mean Loss 1.642 - Mean Q Value 46.366 - Time Delta 28.704Time 2023-05-30T09:14:45




Episode 40000 - Step 5797825 - Epsilon 0.23469782780061396 - Mean Reward 880.26 - Mean Length 171.7 - Mean Loss 1.634 - Mean Q Value 46.572 - Time Delta 18.112Time 2023-05-30T09:15:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_5800000.chkpt at step 5800000
Episode 40000 - Step 5801544 - Epsilon 0.23447971887701985 - Mean Reward 885.83 - Mean Length 175.22 - Mean Loss 1.63 - Mean Q Value 46.961 - Time Delta 28.169Time 2023-05-30T09:15:32




Episode 40000 - Step 5806029 - Epsilon 0.23421695579842683 - Mean Reward 917.04 - Mean Length 179.84 - Mean Loss 1.65 - Mean Q Value 47.413 - Time Delta 33.583Time 2023-05-30T09:16:05




Episode 40000 - Step 5809516 - Epsilon 0.23401286611218694 - Mean Reward 904.0 - Mean Length 176.01 - Mean Loss 1.649 - Mean Q Value 48.007 - Time Delta 27.915Time 2023-05-30T09:16:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_5810000.chkpt at step 5810000
Episode 40000 - Step 5813245 - Epsilon 0.23379480924801477 - Mean Reward 911.36 - Mean Length 177.69 - Mean Loss 1.698 - Mean Q Value 48.738 - Time Delta 27.724Time 2023-05-30T09:17:01




Episode 40000 - Step 5817546 - Epsilon 0.23354355645192082 - Mean Reward 973.39 - Mean Length 197.21 - Mean Loss 1.728 - Mean Q Value 49.509 - Time Delta 31.635Time 2023-05-30T09:17:32




MarioNet saved to ./checkpoints_pytorch/ mario_net_5820000.chkpt at step 5820000
Episode 40000 - Step 5820015 - Epsilon 0.233399446154283 - Mean Reward 917.09 - Mean Length 184.71 - Mean Loss 1.735 - Mean Q Value 50.151 - Time Delta 18.835Time 2023-05-30T09:17:51




Episode 40000 - Step 5822939 - Epsilon 0.2332288934823976 - Mean Reward 835.24 - Mean Length 169.1 - Mean Loss 1.792 - Mean Q Value 50.692 - Time Delta 20.911Time 2023-05-30T09:18:12




Episode 40000 - Step 5826846 - Epsilon 0.23300119835090666 - Mean Reward 849.5 - Mean Length 173.3 - Mean Loss 1.8 - Mean Q Value 50.978 - Time Delta 28.046Time 2023-05-30T09:18:40




MarioNet saved to ./checkpoints_pytorch/ mario_net_5830000.chkpt at step 5830000
Episode 40000 - Step 5830306 - Epsilon 0.23279973943266585 - Mean Reward 823.97 - Mean Length 170.61 - Mean Loss 1.776 - Mean Q Value 51.085 - Time Delta 24.026Time 2023-05-30T09:19:04




Episode 40000 - Step 5834010 - Epsilon 0.23258426662629222 - Mean Reward 801.62 - Mean Length 164.64 - Mean Loss 1.8 - Mean Q Value 50.886 - Time Delta 26.233Time 2023-05-30T09:19:30




Episode 40000 - Step 5837818 - Epsilon 0.23236295173936986 - Mean Reward 835.69 - Mean Length 178.03 - Mean Loss 1.823 - Mean Q Value 50.598 - Time Delta 27.08Time 2023-05-30T09:19:58




MarioNet saved to ./checkpoints_pytorch/ mario_net_5840000.chkpt at step 5840000
Episode 40000 - Step 5843235 - Epsilon 0.23204848715251936 - Mean Reward 870.7 - Mean Length 202.96 - Mean Loss 1.831 - Mean Q Value 50.112 - Time Delta 38.201Time 2023-05-30T09:20:36




Episode 40000 - Step 5847008 - Epsilon 0.23182971058635343 - Mean Reward 848.14 - Mean Length 201.62 - Mean Loss 1.848 - Mean Q Value 49.293 - Time Delta 26.761Time 2023-05-30T09:21:03




MarioNet saved to ./checkpoints_pytorch/ mario_net_5850000.chkpt at step 5850000
Episode 40000 - Step 5851805 - Epsilon 0.23155185541337106 - Mean Reward 838.42 - Mean Length 214.99 - Mean Loss 1.891 - Mean Q Value 48.381 - Time Delta 35.198Time 2023-05-30T09:21:38




Episode 40000 - Step 5855516 - Epsilon 0.23133713277250634 - Mean Reward 854.13 - Mean Length 215.06 - Mean Loss 1.881 - Mean Q Value 47.547 - Time Delta 27.771Time 2023-05-30T09:22:05




Episode 40000 - Step 5859742 - Epsilon 0.23109285412408098 - Mean Reward 838.98 - Mean Length 219.24 - Mean Loss 1.884 - Mean Q Value 46.71 - Time Delta 32.053Time 2023-05-30T09:22:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_5860000.chkpt at step 5860000
Episode 40000 - Step 5863735 - Epsilon 0.23086228075767679 - Mean Reward 821.73 - Mean Length 205.0 - Mean Loss 1.856 - Mean Q Value 45.775 - Time Delta 28.556Time 2023-05-30T09:23:06




Episode 40000 - Step 5867422 - Epsilon 0.23064958146661604 - Mean Reward 813.54 - Mean Length 204.14 - Mean Loss 1.857 - Mean Q Value 44.967 - Time Delta 26.417Time 2023-05-30T09:23:33




MarioNet saved to ./checkpoints_pytorch/ mario_net_5870000.chkpt at step 5870000
Episode 40000 - Step 5872439 - Epsilon 0.2303604705394738 - Mean Reward 832.37 - Mean Length 206.34 - Mean Loss 1.816 - Mean Q Value 44.044 - Time Delta 49.364Time 2023-05-30T09:24:22




Episode 40000 - Step 5876494 - Epsilon 0.23012706091274981 - Mean Reward 857.21 - Mean Length 209.78 - Mean Loss 1.788 - Mean Q Value 43.054 - Time Delta 29.595Time 2023-05-30T09:24:51




Episode 40000 - Step 5879696 - Epsilon 0.229942917890524 - Mean Reward 868.58 - Mean Length 199.54 - Mean Loss 1.741 - Mean Q Value 42.421 - Time Delta 23.468Time 2023-05-30T09:25:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_5880000.chkpt at step 5880000
Episode 40000 - Step 5882895 - Epsilon 0.22975909453509938 - Mean Reward 859.11 - Mean Length 191.6 - Mean Loss 1.672 - Mean Q Value 42.2 - Time Delta 23.248Time 2023-05-30T09:25:38




Episode 40000 - Step 5886456 - Epsilon 0.2295546424958206 - Mean Reward 862.84 - Mean Length 190.34 - Mean Loss 1.611 - Mean Q Value 42.411 - Time Delta 26.825Time 2023-05-30T09:26:05




Episode 40000 - Step 5889647 - Epsilon 0.22937158828231966 - Mean Reward 856.08 - Mean Length 172.08 - Mean Loss 1.571 - Mean Q Value 42.96 - Time Delta 23.996Time 2023-05-30T09:26:29




MarioNet saved to ./checkpoints_pytorch/ mario_net_5890000.chkpt at step 5890000
Episode 40000 - Step 5892880 - Epsilon 0.22918627357329546 - Mean Reward 785.08 - Mean Length 163.86 - Mean Loss 1.552 - Mean Q Value 43.656 - Time Delta 24.179Time 2023-05-30T09:26:53




Episode 40000 - Step 5896693 - Epsilon 0.22896790582674836 - Mean Reward 807.98 - Mean Length 169.97 - Mean Loss 1.539 - Mean Q Value 44.184 - Time Delta 75.863Time 2023-05-30T09:28:09




MarioNet saved to ./checkpoints_pytorch/ mario_net_5900000.chkpt at step 5900000
Episode 40000 - Step 5901493 - Epsilon 0.22869330909638294 - Mean Reward 831.12 - Mean Length 185.98 - Mean Loss 1.537 - Mean Q Value 44.668 - Time Delta 98.779Time 2023-05-30T09:29:48




Episode 40000 - Step 5904661 - Epsilon 0.22851225567953876 - Mean Reward 810.11 - Mean Length 182.05 - Mean Loss 1.551 - Mean Q Value 44.972 - Time Delta 22.643Time 2023-05-30T09:30:10




MarioNet saved to ./checkpoints_pytorch/ mario_net_5910000.chkpt at step 5910000
Episode 40000 - Step 5910957 - Epsilon 0.22815286026251777 - Mean Reward 812.56 - Mean Length 213.1 - Mean Loss 1.549 - Mean Q Value 44.723 - Time Delta 45.946Time 2023-05-30T09:30:56




Episode 40000 - Step 5914116 - Epsilon 0.22797274764991582 - Mean Reward 838.21 - Mean Length 212.36 - Mean Loss 1.548 - Mean Q Value 44.594 - Time Delta 23.471Time 2023-05-30T09:31:20




Episode 40000 - Step 5918023 - Epsilon 0.22775018395303787 - Mean Reward 800.52 - Mean Length 213.3 - Mean Loss 1.557 - Mean Q Value 44.376 - Time Delta 29.627Time 2023-05-30T09:31:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_5920000.chkpt at step 5920000
Episode 40000 - Step 5923196 - Epsilon 0.22745583636434033 - Mean Reward 811.78 - Mean Length 217.03 - Mean Loss 1.578 - Mean Q Value 43.864 - Time Delta 40.68Time 2023-05-30T09:32:30




Episode 40000 - Step 5925825 - Epsilon 0.2273063901143425 - Mean Reward 806.45 - Mean Length 211.64 - Mean Loss 1.59 - Mean Q Value 43.137 - Time Delta 19.413Time 2023-05-30T09:32:50




Episode 40000 - Step 5929099 - Epsilon 0.22712041593107796 - Mean Reward 790.6 - Mean Length 181.42 - Mean Loss 1.573 - Mean Q Value 42.835 - Time Delta 25.383Time 2023-05-30T09:33:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_5930000.chkpt at step 5930000
Episode 40000 - Step 5933697 - Epsilon 0.22685949097566235 - Mean Reward 798.03 - Mean Length 195.81 - Mean Loss 1.598 - Mean Q Value 42.254 - Time Delta 35.748Time 2023-05-30T09:33:51




Episode 40000 - Step 5936714 - Epsilon 0.22668844669637217 - Mean Reward 794.39 - Mean Length 186.91 - Mean Loss 1.604 - Mean Q Value 41.547 - Time Delta 24.507Time 2023-05-30T09:34:15




MarioNet saved to ./checkpoints_pytorch/ mario_net_5940000.chkpt at step 5940000
Episode 40000 - Step 5941546 - Epsilon 0.22641477235109986 - Mean Reward 756.23 - Mean Length 183.5 - Mean Loss 1.575 - Mean Q Value 41.021 - Time Delta 40.086Time 2023-05-30T09:34:55




Episode 40000 - Step 5947409 - Epsilon 0.22608314795562806 - Mean Reward 769.12 - Mean Length 215.84 - Mean Loss 1.533 - Mean Q Value 40.422 - Time Delta 41.945Time 2023-05-30T09:35:37




Episode 40000 - Step 5949950 - Epsilon 0.22593957422535715 - Mean Reward 740.92 - Mean Length 208.51 - Mean Loss 1.526 - Mean Q Value 40.067 - Time Delta 19.142Time 2023-05-30T09:35:56




MarioNet saved to ./checkpoints_pytorch/ mario_net_5950000.chkpt at step 5950000
Episode 40000 - Step 5954917 - Epsilon 0.22565918784489766 - Mean Reward 754.91 - Mean Length 212.2 - Mean Loss 1.494 - Mean Q Value 39.723 - Time Delta 38.805Time 2023-05-30T09:36:35




Episode 40000 - Step 5957925 - Epsilon 0.22548955590401465 - Mean Reward 772.62 - Mean Length 212.11 - Mean Loss 1.484 - Mean Q Value 39.319 - Time Delta 23.88Time 2023-05-30T09:36:59




MarioNet saved to ./checkpoints_pytorch/ mario_net_5960000.chkpt at step 5960000
Episode 40000 - Step 5960682 - Epsilon 0.2253341907570302 - Mean Reward 758.74 - Mean Length 191.36 - Mean Loss 1.49 - Mean Q Value 39.222 - Time Delta 21.739Time 2023-05-30T09:37:21




Episode 40000 - Step 5964676 - Epsilon 0.22510930683140082 - Mean Reward 784.48 - Mean Length 172.67 - Mean Loss 1.5 - Mean Q Value 39.62 - Time Delta 31.276Time 2023-05-30T09:37:52




Episode 40000 - Step 5967789 - Epsilon 0.22493418364509304 - Mean Reward 809.72 - Mean Length 178.39 - Mean Loss 1.5 - Mean Q Value 39.635 - Time Delta 24.383Time 2023-05-30T09:38:17




MarioNet saved to ./checkpoints_pytorch/ mario_net_5970000.chkpt at step 5970000
Episode 40000 - Step 5970916 - Epsilon 0.22475841003949243 - Mean Reward 793.55 - Mean Length 159.99 - Mean Loss 1.479 - Mean Q Value 39.85 - Time Delta 24.714Time 2023-05-30T09:38:41




Episode 40000 - Step 5974438 - Epsilon 0.22456059733452222 - Mean Reward 830.22 - Mean Length 165.13 - Mean Loss 1.462 - Mean Q Value 40.678 - Time Delta 27.376Time 2023-05-30T09:39:09




Episode 40000 - Step 5978174 - Epsilon 0.22435095562829452 - Mean Reward 872.51 - Mean Length 174.92 - Mean Loss 1.443 - Mean Q Value 41.376 - Time Delta 28.96Time 2023-05-30T09:39:38




MarioNet saved to ./checkpoints_pytorch/ mario_net_5980000.chkpt at step 5980000
Episode 40000 - Step 5981988 - Epsilon 0.22413713891866263 - Mean Reward 883.04 - Mean Length 173.12 - Mean Loss 1.435 - Mean Q Value 42.188 - Time Delta 30.273Time 2023-05-30T09:40:08




Episode 40000 - Step 5985656 - Epsilon 0.22393169934500642 - Mean Reward 932.54 - Mean Length 178.67 - Mean Loss 1.475 - Mean Q Value 43.596 - Time Delta 28.714Time 2023-05-30T09:40:37




Episode 40000 - Step 5989338 - Epsilon 0.22372566503181715 - Mean Reward 965.17 - Mean Length 184.22 - Mean Loss 1.5 - Mean Q Value 45.426 - Time Delta 27.882Time 2023-05-30T09:41:04




MarioNet saved to ./checkpoints_pytorch/ mario_net_5990000.chkpt at step 5990000
Episode 40000 - Step 5993270 - Epsilon 0.22350585073201143 - Mean Reward 966.9 - Mean Length 188.32 - Mean Loss 1.55 - Mean Q Value 46.873 - Time Delta 29.116Time 2023-05-30T09:41:34




Episode 40000 - Step 5997048 - Epsilon 0.22329484909080177 - Mean Reward 956.81 - Mean Length 188.74 - Mean Loss 1.583 - Mean Q Value 48.39 - Time Delta 28.097Time 2023-05-30T09:42:02




MarioNet saved to ./checkpoints_pytorch/ mario_net_6000000.chkpt at step 6000000
Episode 40000 - Step 6000505 - Epsilon 0.22310194986192172 - Mean Reward 961.59 - Mean Length 185.17 - Mean Loss 1.603 - Mean Q Value 49.668 - Time Delta 26.89Time 2023-05-30T09:42:29




Episode 40000 - Step 6003658 - Epsilon 0.2229261590206119 - Mean Reward 932.58 - Mean Length 180.02 - Mean Loss 1.638 - Mean Q Value 50.624 - Time Delta 24.202Time 2023-05-30T09:42:53




Episode 40000 - Step 6008203 - Epsilon 0.22267300299225004 - Mean Reward 953.26 - Mean Length 188.65 - Mean Loss 1.664 - Mean Q Value 51.133 - Time Delta 33.986Time 2023-05-30T09:43:27




MarioNet saved to ./checkpoints_pytorch/ mario_net_6010000.chkpt at step 6010000
Episode 40000 - Step 6011848 - Epsilon 0.22247018461604978 - Mean Reward 951.93 - Mean Length 185.78 - Mean Loss 1.675 - Mean Q Value 51.545 - Time Delta 26.978Time 2023-05-30T09:43:54




Episode 40000 - Step 6015342 - Epsilon 0.2222759417334098 - Mean Reward 967.91 - Mean Length 182.94 - Mean Loss 1.712 - Mean Q Value 51.856 - Time Delta 25.887Time 2023-05-30T09:44:20




Episode 40000 - Step 6019523 - Epsilon 0.2220437291574933 - Mean Reward 983.95 - Mean Length 190.18 - Mean Loss 1.741 - Mean Q Value 52.102 - Time Delta 30.422Time 2023-05-30T09:44:50




MarioNet saved to ./checkpoints_pytorch/ mario_net_6020000.chkpt at step 6020000
Episode 40000 - Step 6023794 - Epsilon 0.22180676846593517 - Mean Reward 1028.93 - Mean Length 201.36 - Mean Loss 1.739 - Mean Q Value 52.172 - Time Delta 30.921Time 2023-05-30T09:45:21




KeyboardInterrupt: 

<Figure size 640x480 with 0 Axes>

In [None]:
torch.cuda.memory_stats()

In [None]:
torch.cuda.memory_summary()

# Testing the agent

#