In [1]:
import numpy as np
import os
import tensorflow as tf
import warnings

from keras.models import Model
from keras.layers import Dense, Flatten, Convolution2D, Input
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from pommerman.configs import ffa_v0_env
from pommerman.envs.v0 import Pomme
from pommerman.agents import SimpleAgent, BaseAgent
from pommerman.constants import BOARD_SIZE
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.core import Env, Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint, Callback

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
number_of_training_steps = 5000000
log_interval = 1000
file_log_path = './dqn/rl_logs/dense_8_2/log.txt'
tensorboard_path = './dqn/logs/dense_8_2/'
model_path = './dqn/model/dense_8_2/model{step}.h4'

In [3]:
if not os.path.isdir(os.path.dirname(file_log_path)):
    os.makedirs(os.path.dirname(file_log_path))
if not os.path.isdir(os.path.dirname(model_path)):
    os.makedirs(os.path.dirname(model_path))

In [9]:
class TensorforceAgent(BaseAgent):
    def act(self, obs, action_space):
        pass


class TensorboardLogger(Callback):
    """Logging in tensorboard without tensorflow ops."""
    def __init__(self, log_dir):
        # Some algorithms compute multiple episodes at once since they are multi-threaded.
        # We therefore use a dictionary that is indexed by the episode to separate episodes
        # from each other.
        self.observations = {}
        self.rewards = {}
        self.actions = {}
        self.metrics = {}
        self.step = 0
        """Creates a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def log_scalar(self, tag, value, step):
        """Log a scalar variable.
        Parameter
        ----------
        tag : basestring
            Name of the scalar
        value
        step : int
            training iteration
        """
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def on_train_begin(self, logs):
        self.metrics_names = self.model.metrics_names

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []
        self.metrics[episode] = []

    def on_episode_end(self, episode, logs):
        episode_steps = len(self.observations[episode])
        variables = {
            'step': self.step,
            'nb_steps': self.params['nb_steps'],
            'episode_steps': episode_steps,
            'episode_reward': np.sum(self.rewards[episode]),
            'reward_mean': np.mean(self.rewards[episode]),
            'reward_min': np.min(self.rewards[episode]),
            'reward_max': np.max(self.rewards[episode]),
            'action_mean': np.mean(self.actions[episode]),
            'action_min': np.min(self.actions[episode]),
            'action_max': np.max(self.actions[episode]),
            'obs_mean': np.mean(self.observations[episode]),
            'obs_min': np.min(self.observations[episode]),
            'obs_max': np.max(self.observations[episode]),
        }

        # Format all metrics.
        metrics = np.array(self.metrics[episode])
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            for idx, name in enumerate(self.metrics_names):
                try:
                    value = np.nanmean(metrics[:, idx])
                except Warning:
                    value = -1
                variables[name] = value
        for key, value in variables.items():
            self.log_scalar(key, value, episode + 1)

        # Free up resources.
        del self.observations[episode]
        del self.rewards[episode]
        del self.actions[episode]
        del self.metrics[episode]

    def on_step_end(self, step, logs):
        episode = logs['episode']
        self.observations[episode].append(logs['observation'])
        self.rewards[episode].append(logs['reward'])
        self.actions[episode].append(logs['action'])
        self.metrics[episode].append(logs['metrics'])
        self.step += 1

In [10]:
# Instantiate the environment
config = ffa_v0_env()
env = Pomme(**config["env_kwargs"])
np.random.seed(0)
env.seed(0)
# Add 3 random agents
agents = []
for agent_id in range(3):
    agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
env.set_agents(agents)
env.set_training_agent(agents[-1].agent_id)
env.set_init_game_state(None)
nb_actions = env.action_space.n


def create_model(actions, input_shape=(2369,)):
    inp = Input(input_shape)        
    x = Dense(8, activation='relu')(inp)
    x = Dense(8, activation='relu')(x)
    out = Dense(actions)(x)
    model = Model(inputs = inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Next, we build a very simple model regardless of the dueling architecture
# if you enable dueling network in DQN , DQN will build a dueling network base on your model automatically
# Also, you can build a dueling network by yourself and turn off the dueling network in DQN.
model = create_model(nb_actions)
print(model.summary())

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 2369)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 18960     
_________________________________________________________________
dense_6 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_7 (Dense)              (None, 6)                 54        
Total params: 19,086
Trainable params: 19,086
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
class EnvWrapper(Env):
    """The abstract environment class that is used by all agents. This class has the exact
        same API that OpenAI Gym uses so that integrating with it is trivial. In contrast to the
        OpenAI Gym implementation, this class only defines the abstract methods without any actual
        implementation.
        To implement your own environment, you need to define the following methods:
        - `step`
        - `reset`
        - `render`
        - `close`
        Refer to the [Gym documentation](https://gym.openai.com/docs/#environments).
        """
    reward_range = (-1, 1)
    action_space = None
    observation_space = None

    def __init__(self, gym, board_size):
        self.gym = gym
        self.action_space = gym.action_space
        self.observation_space = gym.observation_space
        self.reward_range = gym.reward_range
        self.board_size = board_size

    def step(self, action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).
        # Arguments
            action (object): An action provided by the environment.
        # Returns
            observation (object): Agent's observation of the current environment.
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        obs = self.gym.get_observations()
        all_actions = self.gym.act(obs)
        all_actions.insert(self.gym.training_agent, action)
        state, reward, terminal, info = self.gym.step(all_actions)
        agent_state = self.featurize(state[self.gym.training_agent])
        agent_reward = reward[self.gym.training_agent]
        return agent_state, agent_reward, terminal, info

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.
        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        obs = self.gym.reset()
        agent_obs = self.featurize(obs[self.gym.training_agent])
        return agent_obs

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.)
        # Arguments
            mode (str): The mode to render with.
            close (bool): Close all open renderings.
        """
        self.gym.render(mode=mode, close=close)

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        self.gym.close()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise self.gym.seed(seed)

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()

    def featurize(self, obs):
        shape = (self.board_size, self.board_size, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)               # Rigid = 1
        wood_map = get_map(board, 2)                # Wood = 2
        bomb_map = get_map(board, 3)                # Bomb = 3
        flames_map = get_map(board, 4)              # Flames = 4
        fog_map = get_map(board, 5)                 # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)          # ExtraBomb = 6
        incr_range_map = get_map(board, 7)          # IncrRange = 7
        kick_map = get_map(board, 8)                # Kick = 8
        skull_map = get_map(board, 9)               # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value) # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = obs["ammo"]
        blast_strength = obs["blast_strength"]
        can_kick = int(obs["can_kick"])

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life], axis=2).flatten()
        obs = np.append(obs, [ammo, blast_strength, can_kick])
        return obs

    def __del__(self):
        self.close()

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)


class CustomProcessor(Processor):
    def process_state_batch(self, batch):
        """Processes an entire batch of states and returns it.
        # Arguments
            batch (list): List of states
        # Returns
            Processed list of states
        """
        batch = np.squeeze(batch, axis=1)
        return batch

    def process_info(self, info):
        """Processes the info as obtained from the environment for use in an agent and
        returns it.
        """
        info['result'] = info['result'].value
        return info

In [12]:
env_wrapper = EnvWrapper(env, BOARD_SIZE)
processor = CustomProcessor()

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
file_logger = FileLogger(file_log_path, interval=log_interval)
checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval)
tensorboard = TensorboardLogger(tensorboard_path)
reduce_rl = ReduceLROnPlateau(monitor='loss', factor=0.9, patience=3)
callbacks=[file_logger, checkpoint, tensorboard]
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=512,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy,
               processor=processor, batch_size=512)
dqn.compile(Adam(lr=1e-4), metrics=['mae'])
if os.path.isfile(model_path):
    dqn.load_weights(model_path)

In [13]:
history = dqn.fit(env_wrapper, nb_steps=number_of_training_steps, visualize=False, verbose=2,
        nb_max_episode_steps=env._max_steps,
                  callbacks=callbacks)

Training for 5000000 steps ...
      33/5000000: episode: 1, duration: 0.518s, episode steps: 33, steps per second: 64, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: --, mean_absolute_error: --, mean_q: --
     125/5000000: episode: 2, duration: 1.386s, episode steps: 92, steps per second: 66, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.772 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: --, mean_absolute_error: --, mean_q: --
     185/5000000: episode: 3, duration: 0.926s, episode steps: 60, steps per second: 65, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.367 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: --, mean_absolute_error: --, mean_q: --
     212/5000000: episode: 4, duration: 0.514s, episode steps: 27, steps per second: 52, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mea

    1446/5000000: episode: 30, duration: 1.692s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.029 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.000702, mean_absolute_error: 0.460401, mean_q: -0.538457
    1477/5000000: episode: 31, duration: 1.485s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.742 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000697, mean_absolute_error: 0.460561, mean_q: -0.539018
    1592/5000000: episode: 32, duration: 5.345s, episode steps: 115, steps per second: 22, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.374 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.000663, mean_absolute_error: 0.460218, mean_q: -0.538551
    1755/5000000: episode: 33, duration: 7.828s, episode steps: 163, steps per second: 21, episode reward: -1.000, mean rew

    3145/5000000: episode: 59, duration: 1.495s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.484 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000821, mean_absolute_error: 0.447495, mean_q: -0.521802
    3171/5000000: episode: 60, duration: 1.282s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000813, mean_absolute_error: 0.448672, mean_q: -0.523429
    3200/5000000: episode: 61, duration: 1.394s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.172 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.000787, mean_absolute_error: 0.448954, mean_q: -0.523448
    3225/5000000: episode: 62, duration: 1.266s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean rewar

    4938/5000000: episode: 88, duration: 1.587s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.875 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000907, mean_absolute_error: 0.455043, mean_q: -0.529358
    4992/5000000: episode: 89, duration: 2.633s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.074 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000920, mean_absolute_error: 0.454852, mean_q: -0.529004
    5025/5000000: episode: 90, duration: 2.586s, episode steps: 33, steps per second: 13, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000934, mean_absolute_error: 0.451426, mean_q: -0.524531
    5221/5000000: episode: 91, duration: 9.613s, episode steps: 196, steps per second: 20, episode reward: -1.000, mean rewa

    6371/5000000: episode: 116, duration: 5.701s, episode steps: 124, steps per second: 22, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.331 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000850, mean_absolute_error: 0.435202, mean_q: -0.507041
    6435/5000000: episode: 117, duration: 3.125s, episode steps: 64, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.000880, mean_absolute_error: 0.435194, mean_q: -0.507706
    6461/5000000: episode: 118, duration: 1.305s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000947, mean_absolute_error: 0.437327, mean_q: -0.509450
    6490/5000000: episode: 119, duration: 1.471s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean 

    8177/5000000: episode: 144, duration: 5.865s, episode steps: 119, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.605 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.000973, mean_absolute_error: 0.441767, mean_q: -0.511637
    8206/5000000: episode: 145, duration: 1.538s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.966 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000927, mean_absolute_error: 0.448398, mean_q: -0.520859
    8233/5000000: episode: 146, duration: 1.242s, episode steps: 27, steps per second: 22, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000981, mean_absolute_error: 0.445639, mean_q: -0.517556
    8401/5000000: episode: 147, duration: 8.115s, episode steps: 168, steps per second: 21, episode reward: -1.000, mean

    9850/5000000: episode: 172, duration: 1.358s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000914, mean_absolute_error: 0.453364, mean_q: -0.523407
    9925/5000000: episode: 173, duration: 3.568s, episode steps: 75, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.053 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.000887, mean_absolute_error: 0.457888, mean_q: -0.530148
    9952/5000000: episode: 174, duration: 1.376s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.926 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000917, mean_absolute_error: 0.454347, mean_q: -0.524667
   10040/5000000: episode: 175, duration: 4.268s, episode steps: 88, steps per second: 21, episode reward: -1.000, mean r

   11652/5000000: episode: 200, duration: 2.613s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.745 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000875, mean_absolute_error: 0.455200, mean_q: -0.525131
   11796/5000000: episode: 201, duration: 6.833s, episode steps: 144, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.389 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.000946, mean_absolute_error: 0.455549, mean_q: -0.523503
   11821/5000000: episode: 202, duration: 1.155s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.120 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001207, mean_absolute_error: 0.456059, mean_q: -0.524495
   11846/5000000: episode: 203, duration: 1.275s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean 

   13090/5000000: episode: 228, duration: 1.231s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.000907, mean_absolute_error: 0.453395, mean_q: -0.523539
   13120/5000000: episode: 229, duration: 1.529s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.467 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000967, mean_absolute_error: 0.456030, mean_q: -0.525621
   13344/5000000: episode: 230, duration: 10.216s, episode steps: 224, steps per second: 22, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.504 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.000943, mean_absolute_error: 0.456563, mean_q: -0.526530
   13371/5000000: episode: 231, duration: 1.361s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean

   15285/5000000: episode: 256, duration: 1.267s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000921, mean_absolute_error: 0.459001, mean_q: -0.531209
   15313/5000000: episode: 257, duration: 1.386s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000958, mean_absolute_error: 0.459387, mean_q: -0.533569
   15340/5000000: episode: 258, duration: 1.374s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.037 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000942, mean_absolute_error: 0.458302, mean_q: -0.531766
   15454/5000000: episode: 259, duration: 5.660s, episode steps: 114, steps per second: 20, episode reward: -1.000, mean 

   16851/5000000: episode: 284, duration: 1.525s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000836, mean_absolute_error: 0.460321, mean_q: -0.537039
   17183/5000000: episode: 285, duration: 15.761s, episode steps: 332, steps per second: 21, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.506 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.000904, mean_absolute_error: 0.457042, mean_q: -0.533359
   17529/5000000: episode: 286, duration: 17.341s, episode steps: 346, steps per second: 20, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.405 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.000936, mean_absolute_error: 0.454820, mean_q: -0.531499
   17558/5000000: episode: 287, duration: 1.379s, episode steps: 29, steps per second: 21, episode reward: -1.000, me

   19303/5000000: episode: 312, duration: 1.213s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.000999, mean_absolute_error: 0.460325, mean_q: -0.540024
   19476/5000000: episode: 313, duration: 8.450s, episode steps: 173, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.555 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.000981, mean_absolute_error: 0.458487, mean_q: -0.537053
   19503/5000000: episode: 314, duration: 1.280s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.259 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001030, mean_absolute_error: 0.457962, mean_q: -0.536972
   19567/5000000: episode: 315, duration: 3.022s, episode steps: 64, steps per second: 21, episode reward: -1.000, mean 

   21122/5000000: episode: 340, duration: 3.058s, episode steps: 62, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.694 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.000951, mean_absolute_error: 0.458621, mean_q: -0.536191
   21150/5000000: episode: 341, duration: 1.337s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001021, mean_absolute_error: 0.456705, mean_q: -0.535897
   21176/5000000: episode: 342, duration: 1.310s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.000971, mean_absolute_error: 0.455310, mean_q: -0.533461
   21279/5000000: episode: 343, duration: 4.813s, episode steps: 103, steps per second: 21, episode reward: -1.000, mean 

   22557/5000000: episode: 368, duration: 2.685s, episode steps: 55, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.618 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.000949, mean_absolute_error: 0.455267, mean_q: -0.534021
   22592/5000000: episode: 369, duration: 1.662s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.743 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.000988, mean_absolute_error: 0.453768, mean_q: -0.532378
   22651/5000000: episode: 370, duration: 2.779s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.814 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.000980, mean_absolute_error: 0.453760, mean_q: -0.532107
   22676/5000000: episode: 371, duration: 1.247s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean r

   24256/5000000: episode: 396, duration: 11.454s, episode steps: 259, steps per second: 23, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.687 [0.000, 5.000], mean observation: 0.054 [0.000, 24.000], loss: 0.000981, mean_absolute_error: 0.455522, mean_q: -0.535791
   24286/5000000: episode: 397, duration: 1.487s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.267 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000936, mean_absolute_error: 0.455433, mean_q: -0.535646
   24320/5000000: episode: 398, duration: 1.653s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001015, mean_absolute_error: 0.457322, mean_q: -0.537548
   24415/5000000: episode: 399, duration: 4.799s, episode steps: 95, steps per second: 20, episode reward: -1.000, mean

   26062/5000000: episode: 424, duration: 19.577s, episode steps: 192, steps per second: 10, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.521 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001032, mean_absolute_error: 0.455163, mean_q: -0.534451
   26088/5000000: episode: 425, duration: 1.276s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.654 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.000913, mean_absolute_error: 0.455112, mean_q: -0.534227
   26117/5000000: episode: 426, duration: 1.331s, episode steps: 29, steps per second: 22, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001249, mean_absolute_error: 0.456675, mean_q: -0.536165
   26143/5000000: episode: 427, duration: 1.206s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean

   27464/5000000: episode: 452, duration: 1.986s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.650 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.000994, mean_absolute_error: 0.453726, mean_q: -0.531899
   27618/5000000: episode: 453, duration: 7.422s, episode steps: 154, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.597 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001046, mean_absolute_error: 0.453584, mean_q: -0.530462
   27738/5000000: episode: 454, duration: 5.607s, episode steps: 120, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.283 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001019, mean_absolute_error: 0.454685, mean_q: -0.531094
   27769/5000000: episode: 455, duration: 1.517s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean

   29343/5000000: episode: 480, duration: 2.893s, episode steps: 60, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.767 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001173, mean_absolute_error: 0.456642, mean_q: -0.532137
   29375/5000000: episode: 481, duration: 1.586s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.188 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001114, mean_absolute_error: 0.455200, mean_q: -0.529469
   29401/5000000: episode: 482, duration: 1.262s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.192 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001318, mean_absolute_error: 0.455604, mean_q: -0.528098
   29428/5000000: episode: 483, duration: 1.410s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean r

   30894/5000000: episode: 508, duration: 2.501s, episode steps: 51, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.863 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001309, mean_absolute_error: 0.457760, mean_q: -0.534764
   30919/5000000: episode: 509, duration: 1.307s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001392, mean_absolute_error: 0.457911, mean_q: -0.531402
   30946/5000000: episode: 510, duration: 1.187s, episode steps: 27, steps per second: 23, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.074 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001076, mean_absolute_error: 0.458695, mean_q: -0.531936
   31149/5000000: episode: 511, duration: 10.366s, episode steps: 203, steps per second: 20, episode reward: -1.000, mean

   32567/5000000: episode: 536, duration: 1.357s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.852 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001097, mean_absolute_error: 0.453378, mean_q: -0.528635
   32594/5000000: episode: 537, duration: 1.343s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.741 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001489, mean_absolute_error: 0.455410, mean_q: -0.530110
   32628/5000000: episode: 538, duration: 1.644s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.382 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001061, mean_absolute_error: 0.454890, mean_q: -0.530867
   32653/5000000: episode: 539, duration: 1.223s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean r

   33831/5000000: episode: 564, duration: 1.347s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.077 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001133, mean_absolute_error: 0.452876, mean_q: -0.530359
   33898/5000000: episode: 565, duration: 3.204s, episode steps: 67, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001180, mean_absolute_error: 0.453027, mean_q: -0.528701
   33929/5000000: episode: 566, duration: 1.487s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001235, mean_absolute_error: 0.455433, mean_q: -0.531772
   33961/5000000: episode: 567, duration: 1.453s, episode steps: 32, steps per second: 22, episode reward: -1.000, mean r

   35232/5000000: episode: 592, duration: 3.829s, episode steps: 78, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001095, mean_absolute_error: 0.457279, mean_q: -0.534461
   35258/5000000: episode: 593, duration: 1.256s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001060, mean_absolute_error: 0.455476, mean_q: -0.532952
   35400/5000000: episode: 594, duration: 6.720s, episode steps: 142, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.232 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001083, mean_absolute_error: 0.456210, mean_q: -0.534644
   35483/5000000: episode: 595, duration: 4.054s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean 

   37056/5000000: episode: 620, duration: 4.058s, episode steps: 84, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001051, mean_absolute_error: 0.463433, mean_q: -0.543239
   37108/5000000: episode: 621, duration: 2.513s, episode steps: 52, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001098, mean_absolute_error: 0.461899, mean_q: -0.540220
   37465/5000000: episode: 622, duration: 17.731s, episode steps: 357, steps per second: 20, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.057 [0.000, 24.000], loss: 0.001118, mean_absolute_error: 0.461136, mean_q: -0.539417
   37494/5000000: episode: 623, duration: 1.457s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean

   39102/5000000: episode: 648, duration: 2.839s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.186 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001075, mean_absolute_error: 0.462247, mean_q: -0.543435
   39130/5000000: episode: 649, duration: 1.383s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001102, mean_absolute_error: 0.465781, mean_q: -0.545578
   39156/5000000: episode: 650, duration: 1.296s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.038 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001044, mean_absolute_error: 0.462382, mean_q: -0.543181
   39210/5000000: episode: 651, duration: 2.650s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean r

   40629/5000000: episode: 676, duration: 1.772s, episode steps: 40, steps per second: 23, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.275 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001045, mean_absolute_error: 0.463652, mean_q: -0.545326
   40656/5000000: episode: 677, duration: 1.283s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001074, mean_absolute_error: 0.462389, mean_q: -0.543075
   40725/5000000: episode: 678, duration: 3.595s, episode steps: 69, steps per second: 19, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.130 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001076, mean_absolute_error: 0.464802, mean_q: -0.546580
   40823/5000000: episode: 679, duration: 4.339s, episode steps: 98, steps per second: 23, episode reward: -1.000, mean r

   42131/5000000: episode: 704, duration: 1.996s, episode steps: 44, steps per second: 22, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.045 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001021, mean_absolute_error: 0.464261, mean_q: -0.545072
   42219/5000000: episode: 705, duration: 4.099s, episode steps: 88, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001034, mean_absolute_error: 0.462629, mean_q: -0.544225
   42324/5000000: episode: 706, duration: 5.010s, episode steps: 105, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.362 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001087, mean_absolute_error: 0.464488, mean_q: -0.545929
   42350/5000000: episode: 707, duration: 1.237s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean 

   43349/5000000: episode: 732, duration: 6.793s, episode steps: 139, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.525 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001074, mean_absolute_error: 0.463510, mean_q: -0.545428
   43535/5000000: episode: 733, duration: 8.918s, episode steps: 186, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.263 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001066, mean_absolute_error: 0.464801, mean_q: -0.547026
   43575/5000000: episode: 734, duration: 2.026s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.575 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001027, mean_absolute_error: 0.463264, mean_q: -0.544544
   43664/5000000: episode: 735, duration: 4.632s, episode steps: 89, steps per second: 19, episode reward: -1.000, mean

   46004/5000000: episode: 760, duration: 1.202s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001184, mean_absolute_error: 0.461616, mean_q: -0.542029
   46029/5000000: episode: 761, duration: 1.251s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001193, mean_absolute_error: 0.462231, mean_q: -0.543983
   46055/5000000: episode: 762, duration: 1.293s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.115 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001124, mean_absolute_error: 0.460628, mean_q: -0.541967
   46130/5000000: episode: 763, duration: 3.979s, episode steps: 75, steps per second: 19, episode reward: -1.000, mean r

   47695/5000000: episode: 788, duration: 1.735s, episode steps: 36, steps per second: 21, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001139, mean_absolute_error: 0.463673, mean_q: -0.544228
   47761/5000000: episode: 789, duration: 3.282s, episode steps: 66, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.773 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001197, mean_absolute_error: 0.461090, mean_q: -0.539357
   47852/5000000: episode: 790, duration: 4.218s, episode steps: 91, steps per second: 22, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.363 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001220, mean_absolute_error: 0.461133, mean_q: -0.539045
   47882/5000000: episode: 791, duration: 1.440s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean r

   49174/5000000: episode: 816, duration: 1.392s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.148 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001221, mean_absolute_error: 0.458755, mean_q: -0.534606
   49240/5000000: episode: 817, duration: 3.172s, episode steps: 66, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001167, mean_absolute_error: 0.460384, mean_q: -0.536311
   49301/5000000: episode: 818, duration: 3.084s, episode steps: 61, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.311 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001187, mean_absolute_error: 0.460040, mean_q: -0.536564
   49326/5000000: episode: 819, duration: 1.226s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean r

   50574/5000000: episode: 844, duration: 1.362s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001249, mean_absolute_error: 0.456263, mean_q: -0.528558
   50604/5000000: episode: 845, duration: 1.410s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.467 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001337, mean_absolute_error: 0.454768, mean_q: -0.526052
   50656/5000000: episode: 846, duration: 2.553s, episode steps: 52, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.173 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001303, mean_absolute_error: 0.455975, mean_q: -0.529297
   50716/5000000: episode: 847, duration: 2.969s, episode steps: 60, steps per second: 20, episode reward: -1.000, mean r

   51749/5000000: episode: 872, duration: 1.270s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001301, mean_absolute_error: 0.455449, mean_q: -0.529078
   51781/5000000: episode: 873, duration: 1.531s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.594 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001174, mean_absolute_error: 0.455138, mean_q: -0.530720
   51807/5000000: episode: 874, duration: 1.235s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.115 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001236, mean_absolute_error: 0.456342, mean_q: -0.532717
   51835/5000000: episode: 875, duration: 1.345s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean r

   53360/5000000: episode: 900, duration: 1.518s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.656 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001180, mean_absolute_error: 0.458869, mean_q: -0.530976
   53387/5000000: episode: 901, duration: 1.381s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.148 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001313, mean_absolute_error: 0.458937, mean_q: -0.532618
   53417/5000000: episode: 902, duration: 1.544s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001320, mean_absolute_error: 0.461333, mean_q: -0.534857
   53442/5000000: episode: 903, duration: 1.133s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean r

   54948/5000000: episode: 928, duration: 4.274s, episode steps: 91, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.725 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001191, mean_absolute_error: 0.462179, mean_q: -0.535299
   55006/5000000: episode: 929, duration: 2.793s, episode steps: 58, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.414 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001228, mean_absolute_error: 0.462540, mean_q: -0.535917
   55111/5000000: episode: 930, duration: 5.261s, episode steps: 105, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.305 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001264, mean_absolute_error: 0.462376, mean_q: -0.537160
   55310/5000000: episode: 931, duration: 9.173s, episode steps: 199, steps per second: 22, episode reward: -1.000, mean

   56578/5000000: episode: 956, duration: 1.169s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001197, mean_absolute_error: 0.459155, mean_q: -0.530397
   56604/5000000: episode: 957, duration: 1.262s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001172, mean_absolute_error: 0.461391, mean_q: -0.535452
   56631/5000000: episode: 958, duration: 1.290s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001267, mean_absolute_error: 0.462674, mean_q: -0.534148
   56663/5000000: episode: 959, duration: 1.548s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean r

   58204/5000000: episode: 984, duration: 5.108s, episode steps: 103, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.107 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001150, mean_absolute_error: 0.461334, mean_q: -0.536460
   58259/5000000: episode: 985, duration: 2.554s, episode steps: 55, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001273, mean_absolute_error: 0.461515, mean_q: -0.537602
   58343/5000000: episode: 986, duration: 4.184s, episode steps: 84, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.405 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001199, mean_absolute_error: 0.460800, mean_q: -0.536233
   58411/5000000: episode: 987, duration: 3.356s, episode steps: 68, steps per second: 20, episode reward: -1.000, mean 

   59875/5000000: episode: 1012, duration: 4.019s, episode steps: 83, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.012 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001217, mean_absolute_error: 0.461355, mean_q: -0.537862
   59913/5000000: episode: 1013, duration: 1.793s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.763 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001170, mean_absolute_error: 0.460053, mean_q: -0.536485
   59938/5000000: episode: 1014, duration: 1.223s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001202, mean_absolute_error: 0.456688, mean_q: -0.532267
   59963/5000000: episode: 1015, duration: 1.255s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

   61431/5000000: episode: 1040, duration: 1.306s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001205, mean_absolute_error: 0.464770, mean_q: -0.542939
   61461/5000000: episode: 1041, duration: 1.462s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001225, mean_absolute_error: 0.462120, mean_q: -0.539646
   61563/5000000: episode: 1042, duration: 5.080s, episode steps: 102, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.637 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001182, mean_absolute_error: 0.462130, mean_q: -0.540563
   61588/5000000: episode: 1043, duration: 1.263s, episode steps: 25, steps per second: 20, episode reward: -1.000, m

   63177/5000000: episode: 1068, duration: 1.343s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001227, mean_absolute_error: 0.463647, mean_q: -0.544677
   63221/5000000: episode: 1069, duration: 2.226s, episode steps: 44, steps per second: 20, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.727 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001250, mean_absolute_error: 0.465229, mean_q: -0.544450
   63437/5000000: episode: 1070, duration: 10.992s, episode steps: 216, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.495 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001164, mean_absolute_error: 0.463693, mean_q: -0.543100
   63657/5000000: episode: 1071, duration: 10.087s, episode steps: 220, steps per second: 22, episode reward: -1.000

   65058/5000000: episode: 1096, duration: 2.011s, episode steps: 42, steps per second: 21, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001231, mean_absolute_error: 0.463279, mean_q: -0.543322
   65111/5000000: episode: 1097, duration: 2.771s, episode steps: 53, steps per second: 19, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.943 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001134, mean_absolute_error: 0.462960, mean_q: -0.542236
   65138/5000000: episode: 1098, duration: 1.350s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.963 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001246, mean_absolute_error: 0.462471, mean_q: -0.542549
   65307/5000000: episode: 1099, duration: 8.357s, episode steps: 169, steps per second: 20, episode reward: -1.000, m

   66649/5000000: episode: 1124, duration: 1.388s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001222, mean_absolute_error: 0.468129, mean_q: -0.551267
   66703/5000000: episode: 1125, duration: 2.557s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.815 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001210, mean_absolute_error: 0.468466, mean_q: -0.550276
   66820/5000000: episode: 1126, duration: 5.793s, episode steps: 117, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.299 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001228, mean_absolute_error: 0.468152, mean_q: -0.551037
   66847/5000000: episode: 1127, duration: 1.317s, episode steps: 27, steps per second: 21, episode reward: -1.000, m

   68259/5000000: episode: 1152, duration: 2.926s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001172, mean_absolute_error: 0.471594, mean_q: -0.555364
   68315/5000000: episode: 1153, duration: 2.643s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.375 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001208, mean_absolute_error: 0.471909, mean_q: -0.556365
   68401/5000000: episode: 1154, duration: 4.063s, episode steps: 86, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.593 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.001176, mean_absolute_error: 0.472089, mean_q: -0.555692
   68430/5000000: episode: 1155, duration: 1.352s, episode steps: 29, steps per second: 21, episode reward: -1.000, me

   69799/5000000: episode: 1180, duration: 1.168s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001107, mean_absolute_error: 0.472065, mean_q: -0.557511
   69862/5000000: episode: 1181, duration: 2.853s, episode steps: 63, steps per second: 22, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.302 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001263, mean_absolute_error: 0.473826, mean_q: -0.558962
   69887/5000000: episode: 1182, duration: 1.344s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001214, mean_absolute_error: 0.474711, mean_q: -0.560220
   69944/5000000: episode: 1183, duration: 2.837s, episode steps: 57, steps per second: 20, episode reward: -1.000, me

   71103/5000000: episode: 1208, duration: 6.823s, episode steps: 144, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001183, mean_absolute_error: 0.475190, mean_q: -0.559879
   71129/5000000: episode: 1209, duration: 1.190s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001177, mean_absolute_error: 0.474696, mean_q: -0.559322
   71157/5000000: episode: 1210, duration: 1.446s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001149, mean_absolute_error: 0.474922, mean_q: -0.560109
   71312/5000000: episode: 1211, duration: 7.395s, episode steps: 155, steps per second: 21, episode reward: -1.000, 

   72372/5000000: episode: 1236, duration: 1.316s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001256, mean_absolute_error: 0.476639, mean_q: -0.562156
   72428/5000000: episode: 1237, duration: 2.624s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001143, mean_absolute_error: 0.474217, mean_q: -0.559100
   72453/5000000: episode: 1238, duration: 1.223s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.960 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001171, mean_absolute_error: 0.474947, mean_q: -0.560138
   72508/5000000: episode: 1239, duration: 2.711s, episode steps: 55, steps per second: 20, episode reward: -1.000, me

   74041/5000000: episode: 1264, duration: 11.525s, episode steps: 92, steps per second: 8, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.185 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001191, mean_absolute_error: 0.479344, mean_q: -0.564713
   74095/5000000: episode: 1265, duration: 2.445s, episode steps: 54, steps per second: 22, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001203, mean_absolute_error: 0.479044, mean_q: -0.564049
   74150/5000000: episode: 1266, duration: 2.654s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001218, mean_absolute_error: 0.478620, mean_q: -0.563958
   74180/5000000: episode: 1267, duration: 1.390s, episode steps: 30, steps per second: 22, episode reward: -1.000, me

   76482/5000000: episode: 1292, duration: 8.076s, episode steps: 159, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.535 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001182, mean_absolute_error: 0.476705, mean_q: -0.560900
   76510/5000000: episode: 1293, duration: 1.376s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001115, mean_absolute_error: 0.476766, mean_q: -0.560221
   76535/5000000: episode: 1294, duration: 1.194s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.840 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001275, mean_absolute_error: 0.476301, mean_q: -0.559891
   76598/5000000: episode: 1295, duration: 2.832s, episode steps: 63, steps per second: 22, episode reward: -1.000, m

   78202/5000000: episode: 1320, duration: 2.845s, episode steps: 60, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.933 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001244, mean_absolute_error: 0.475467, mean_q: -0.559037
   78292/5000000: episode: 1321, duration: 4.440s, episode steps: 90, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.411 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001199, mean_absolute_error: 0.474883, mean_q: -0.558141
   78381/5000000: episode: 1322, duration: 4.287s, episode steps: 89, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.584 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001205, mean_absolute_error: 0.475039, mean_q: -0.558602
   78471/5000000: episode: 1323, duration: 4.519s, episode steps: 90, steps per second: 20, episode reward: -1.000, me

   79782/5000000: episode: 1348, duration: 1.265s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001164, mean_absolute_error: 0.472959, mean_q: -0.554613
   80058/5000000: episode: 1349, duration: 21.921s, episode steps: 276, steps per second: 13, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.591 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001194, mean_absolute_error: 0.473191, mean_q: -0.554962
   80129/5000000: episode: 1350, duration: 3.481s, episode steps: 71, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.282 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001222, mean_absolute_error: 0.473031, mean_q: -0.554156
   80228/5000000: episode: 1351, duration: 4.388s, episode steps: 99, steps per second: 23, episode reward: -1.000, 

   81762/5000000: episode: 1376, duration: 7.700s, episode steps: 157, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.662 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001197, mean_absolute_error: 0.471307, mean_q: -0.552927
   81827/5000000: episode: 1377, duration: 3.206s, episode steps: 65, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.354 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001259, mean_absolute_error: 0.472890, mean_q: -0.553522
   81984/5000000: episode: 1378, duration: 6.736s, episode steps: 157, steps per second: 23, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.771 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001231, mean_absolute_error: 0.471497, mean_q: -0.552100
   82061/5000000: episode: 1379, duration: 3.627s, episode steps: 77, steps per second: 21, episode reward: -1.000, 

   83432/5000000: episode: 1404, duration: 1.393s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.071 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001272, mean_absolute_error: 0.470694, mean_q: -0.552549
   83466/5000000: episode: 1405, duration: 1.645s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.882 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001199, mean_absolute_error: 0.468909, mean_q: -0.550459
   83496/5000000: episode: 1406, duration: 1.577s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.867 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001195, mean_absolute_error: 0.470542, mean_q: -0.552718
   83550/5000000: episode: 1407, duration: 2.639s, episode steps: 54, steps per second: 20, episode reward: -1.000, me

   85063/5000000: episode: 1432, duration: 1.942s, episode steps: 39, steps per second: 20, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 3.051 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001296, mean_absolute_error: 0.462501, mean_q: -0.542544
   85145/5000000: episode: 1433, duration: 3.835s, episode steps: 82, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.610 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001239, mean_absolute_error: 0.462581, mean_q: -0.542629
   85181/5000000: episode: 1434, duration: 1.659s, episode steps: 36, steps per second: 22, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001190, mean_absolute_error: 0.462615, mean_q: -0.543557
   85244/5000000: episode: 1435, duration: 2.919s, episode steps: 63, steps per second: 22, episode reward: -1.000, me

   87150/5000000: episode: 1460, duration: 1.485s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001197, mean_absolute_error: 0.463412, mean_q: -0.545190
   87215/5000000: episode: 1461, duration: 3.046s, episode steps: 65, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.554 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001190, mean_absolute_error: 0.461452, mean_q: -0.542575
   87241/5000000: episode: 1462, duration: 1.277s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.654 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001336, mean_absolute_error: 0.460484, mean_q: -0.540722
   87436/5000000: episode: 1463, duration: 9.255s, episode steps: 195, steps per second: 21, episode reward: -1.000, m

   88929/5000000: episode: 1488, duration: 10.802s, episode steps: 247, steps per second: 23, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.417 [0.000, 5.000], mean observation: 0.056 [0.000, 24.000], loss: 0.001226, mean_absolute_error: 0.461887, mean_q: -0.542861
   88986/5000000: episode: 1489, duration: 2.808s, episode steps: 57, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.123 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001167, mean_absolute_error: 0.462655, mean_q: -0.544602
   89126/5000000: episode: 1490, duration: 6.789s, episode steps: 140, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001177, mean_absolute_error: 0.462018, mean_q: -0.543317
   89151/5000000: episode: 1491, duration: 1.227s, episode steps: 25, steps per second: 20, episode reward: -1.000,

   90429/5000000: episode: 1516, duration: 2.680s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001190, mean_absolute_error: 0.457317, mean_q: -0.537755
   90506/5000000: episode: 1517, duration: 3.607s, episode steps: 77, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001238, mean_absolute_error: 0.456986, mean_q: -0.537154
   90538/5000000: episode: 1518, duration: 1.578s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.469 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001264, mean_absolute_error: 0.457126, mean_q: -0.537268
   90569/5000000: episode: 1519, duration: 1.486s, episode steps: 31, steps per second: 21, episode reward: -1.000, me

   91830/5000000: episode: 1544, duration: 2.912s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.377 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001260, mean_absolute_error: 0.452167, mean_q: -0.531025
   91856/5000000: episode: 1545, duration: 1.231s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001222, mean_absolute_error: 0.453244, mean_q: -0.532764
   91938/5000000: episode: 1546, duration: 4.005s, episode steps: 82, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.061 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001240, mean_absolute_error: 0.453660, mean_q: -0.533084
   91995/5000000: episode: 1547, duration: 2.832s, episode steps: 57, steps per second: 20, episode reward: -1.000, me

   93309/5000000: episode: 1572, duration: 1.575s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.290 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001249, mean_absolute_error: 0.446719, mean_q: -0.524735
   93334/5000000: episode: 1573, duration: 1.263s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001281, mean_absolute_error: 0.447185, mean_q: -0.525257
   93388/5000000: episode: 1574, duration: 2.598s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001315, mean_absolute_error: 0.447481, mean_q: -0.525009
   93497/5000000: episode: 1575, duration: 5.412s, episode steps: 109, steps per second: 20, episode reward: -1.000, m

   95187/5000000: episode: 1600, duration: 4.045s, episode steps: 85, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.624 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001307, mean_absolute_error: 0.442705, mean_q: -0.520723
   95214/5000000: episode: 1601, duration: 1.346s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.741 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001334, mean_absolute_error: 0.443802, mean_q: -0.521309
   95295/5000000: episode: 1602, duration: 3.870s, episode steps: 81, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.506 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001289, mean_absolute_error: 0.442278, mean_q: -0.520254
   95324/5000000: episode: 1603, duration: 1.397s, episode steps: 29, steps per second: 21, episode reward: -1.000, me

   97125/5000000: episode: 1628, duration: 4.674s, episode steps: 92, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.478 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001371, mean_absolute_error: 0.434878, mean_q: -0.510520
   97153/5000000: episode: 1629, duration: 1.323s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001318, mean_absolute_error: 0.433950, mean_q: -0.509286
   97190/5000000: episode: 1630, duration: 1.773s, episode steps: 37, steps per second: 21, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.568 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001325, mean_absolute_error: 0.432950, mean_q: -0.507185
   97217/5000000: episode: 1631, duration: 1.338s, episode steps: 27, steps per second: 20, episode reward: -1.000, me

   98272/5000000: episode: 1656, duration: 3.128s, episode steps: 67, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.060 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001418, mean_absolute_error: 0.429404, mean_q: -0.502913
   98298/5000000: episode: 1657, duration: 1.263s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001476, mean_absolute_error: 0.428238, mean_q: -0.502050
   98451/5000000: episode: 1658, duration: 7.516s, episode steps: 153, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001371, mean_absolute_error: 0.429877, mean_q: -0.503187
   98477/5000000: episode: 1659, duration: 1.205s, episode steps: 26, steps per second: 22, episode reward: -1.000, m

   99875/5000000: episode: 1684, duration: 1.936s, episode steps: 39, steps per second: 20, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.103 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001390, mean_absolute_error: 0.427168, mean_q: -0.500365
   99902/5000000: episode: 1685, duration: 1.275s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001466, mean_absolute_error: 0.427357, mean_q: -0.499881
  100012/5000000: episode: 1686, duration: 5.028s, episode steps: 110, steps per second: 22, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.609 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001408, mean_absolute_error: 0.428776, mean_q: -0.501530
  100042/5000000: episode: 1687, duration: 1.502s, episode steps: 30, steps per second: 20, episode reward: -1.000, m

  101402/5000000: episode: 1712, duration: 1.399s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001457, mean_absolute_error: 0.425275, mean_q: -0.496559
  101428/5000000: episode: 1713, duration: 1.286s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.038 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001366, mean_absolute_error: 0.425604, mean_q: -0.498288
  101594/5000000: episode: 1714, duration: 8.163s, episode steps: 166, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.723 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001402, mean_absolute_error: 0.426727, mean_q: -0.499740
  101619/5000000: episode: 1715, duration: 1.185s, episode steps: 25, steps per second: 21, episode reward: -1.000, m

  103036/5000000: episode: 1740, duration: 1.264s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001375, mean_absolute_error: 0.429195, mean_q: -0.502876
  103093/5000000: episode: 1741, duration: 2.776s, episode steps: 57, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001425, mean_absolute_error: 0.427970, mean_q: -0.501682
  103120/5000000: episode: 1742, duration: 1.383s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001454, mean_absolute_error: 0.429198, mean_q: -0.502036
  103189/5000000: episode: 1743, duration: 3.433s, episode steps: 69, steps per second: 20, episode reward: -1.000, me

  104821/5000000: episode: 1768, duration: 1.569s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001375, mean_absolute_error: 0.431327, mean_q: -0.505720
  104852/5000000: episode: 1769, duration: 1.500s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001377, mean_absolute_error: 0.427869, mean_q: -0.502297
  104945/5000000: episode: 1770, duration: 4.379s, episode steps: 93, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.366 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001397, mean_absolute_error: 0.430691, mean_q: -0.506081
  104976/5000000: episode: 1771, duration: 1.595s, episode steps: 31, steps per second: 19, episode reward: -1.000, me

  106163/5000000: episode: 1796, duration: 4.146s, episode steps: 85, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.376 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001425, mean_absolute_error: 0.428821, mean_q: -0.503067
  106219/5000000: episode: 1797, duration: 2.747s, episode steps: 56, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001452, mean_absolute_error: 0.428419, mean_q: -0.502316
  106254/5000000: episode: 1798, duration: 1.766s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001461, mean_absolute_error: 0.428580, mean_q: -0.502016
  106345/5000000: episode: 1799, duration: 4.448s, episode steps: 91, steps per second: 20, episode reward: -1.000, me

  107649/5000000: episode: 1824, duration: 3.171s, episode steps: 65, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.015 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001450, mean_absolute_error: 0.429446, mean_q: -0.503811
  107676/5000000: episode: 1825, duration: 1.267s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001508, mean_absolute_error: 0.429291, mean_q: -0.503618
  107823/5000000: episode: 1826, duration: 7.014s, episode steps: 147, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.395 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001420, mean_absolute_error: 0.427634, mean_q: -0.501665
  107863/5000000: episode: 1827, duration: 1.974s, episode steps: 40, steps per second: 20, episode reward: -1.000, m

  109569/5000000: episode: 1852, duration: 1.776s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.053 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001519, mean_absolute_error: 0.424644, mean_q: -0.497582
  109595/5000000: episode: 1853, duration: 1.243s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001341, mean_absolute_error: 0.423974, mean_q: -0.497738
  109624/5000000: episode: 1854, duration: 1.298s, episode steps: 29, steps per second: 22, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001432, mean_absolute_error: 0.424914, mean_q: -0.499312
  109701/5000000: episode: 1855, duration: 3.531s, episode steps: 77, steps per second: 22, episode reward: -1.000, me

  111214/5000000: episode: 1880, duration: 1.208s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001467, mean_absolute_error: 0.421383, mean_q: -0.493336
  111388/5000000: episode: 1881, duration: 8.249s, episode steps: 174, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.638 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001427, mean_absolute_error: 0.422822, mean_q: -0.496455
  111419/5000000: episode: 1882, duration: 1.513s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.194 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001358, mean_absolute_error: 0.421738, mean_q: -0.495313
  111482/5000000: episode: 1883, duration: 3.212s, episode steps: 63, steps per second: 20, episode reward: -1.000, m

  113051/5000000: episode: 1908, duration: 4.169s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.446 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001417, mean_absolute_error: 0.422001, mean_q: -0.495143
  113079/5000000: episode: 1909, duration: 1.332s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.821 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001382, mean_absolute_error: 0.419725, mean_q: -0.492455
  113112/5000000: episode: 1910, duration: 1.651s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001381, mean_absolute_error: 0.420979, mean_q: -0.492889
  113137/5000000: episode: 1911, duration: 1.241s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  114851/5000000: episode: 1936, duration: 2.760s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001498, mean_absolute_error: 0.421204, mean_q: -0.494216
  114882/5000000: episode: 1937, duration: 1.565s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001461, mean_absolute_error: 0.421143, mean_q: -0.493494
  114914/5000000: episode: 1938, duration: 1.587s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001384, mean_absolute_error: 0.419299, mean_q: -0.491745
  114970/5000000: episode: 1939, duration: 2.653s, episode steps: 56, steps per second: 21, episode reward: -1.000, me

  116400/5000000: episode: 1964, duration: 2.929s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.590 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001470, mean_absolute_error: 0.420364, mean_q: -0.492258
  116456/5000000: episode: 1965, duration: 2.659s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.232 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001420, mean_absolute_error: 0.421328, mean_q: -0.494603
  116481/5000000: episode: 1966, duration: 1.313s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001506, mean_absolute_error: 0.419603, mean_q: -0.491464
  116627/5000000: episode: 1967, duration: 7.104s, episode steps: 146, steps per second: 21, episode reward: -1.000, m

  117793/5000000: episode: 1992, duration: 1.235s, episode steps: 27, steps per second: 22, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.296 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001474, mean_absolute_error: 0.417273, mean_q: -0.488345
  117821/5000000: episode: 1993, duration: 1.369s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001362, mean_absolute_error: 0.416515, mean_q: -0.487740
  117846/5000000: episode: 1994, duration: 1.152s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001373, mean_absolute_error: 0.416308, mean_q: -0.487575
  117871/5000000: episode: 1995, duration: 1.186s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  119659/5000000: episode: 2020, duration: 4.218s, episode steps: 86, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.453 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001451, mean_absolute_error: 0.421400, mean_q: -0.493504
  119729/5000000: episode: 2021, duration: 3.161s, episode steps: 70, steps per second: 22, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.271 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001420, mean_absolute_error: 0.423022, mean_q: -0.496007
  119880/5000000: episode: 2022, duration: 7.645s, episode steps: 151, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.397 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001453, mean_absolute_error: 0.422241, mean_q: -0.494737
  119936/5000000: episode: 2023, duration: 2.745s, episode steps: 56, steps per second: 20, episode reward: -1.000, m

  121316/5000000: episode: 2048, duration: 4.701s, episode steps: 93, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.183 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001499, mean_absolute_error: 0.424906, mean_q: -0.498553
  121371/5000000: episode: 2049, duration: 2.610s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001408, mean_absolute_error: 0.425752, mean_q: -0.498690
  121399/5000000: episode: 2050, duration: 1.340s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.893 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001424, mean_absolute_error: 0.423831, mean_q: -0.495368
  121565/5000000: episode: 2051, duration: 8.347s, episode steps: 166, steps per second: 20, episode reward: -1.000, m

  123074/5000000: episode: 2076, duration: 1.283s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001406, mean_absolute_error: 0.418394, mean_q: -0.489160
  123121/5000000: episode: 2077, duration: 2.242s, episode steps: 47, steps per second: 21, episode reward: -1.000, mean reward: -0.021 [-1.000, 0.000], mean action: 2.043 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001552, mean_absolute_error: 0.419554, mean_q: -0.490867
  123223/5000000: episode: 2078, duration: 4.890s, episode steps: 102, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.412 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001460, mean_absolute_error: 0.418979, mean_q: -0.490156
  123248/5000000: episode: 2079, duration: 1.242s, episode steps: 25, steps per second: 20, episode reward: -1.000, m

  124836/5000000: episode: 2104, duration: 1.448s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.724 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001673, mean_absolute_error: 0.423124, mean_q: -0.494549
  124918/5000000: episode: 2105, duration: 4.061s, episode steps: 82, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.732 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.421218, mean_q: -0.492851
  124947/5000000: episode: 2106, duration: 1.401s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001541, mean_absolute_error: 0.419964, mean_q: -0.490075
  124973/5000000: episode: 2107, duration: 1.338s, episode steps: 26, steps per second: 19, episode reward: -1.000, me

  126603/5000000: episode: 2132, duration: 9.140s, episode steps: 200, steps per second: 22, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.535 [0.000, 5.000], mean observation: 0.058 [0.000, 24.000], loss: 0.001486, mean_absolute_error: 0.425136, mean_q: -0.497340
  126628/5000000: episode: 2133, duration: 1.177s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001520, mean_absolute_error: 0.424231, mean_q: -0.496139
  126672/5000000: episode: 2134, duration: 2.204s, episode steps: 44, steps per second: 20, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.409 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001516, mean_absolute_error: 0.423876, mean_q: -0.494934
  126699/5000000: episode: 2135, duration: 1.255s, episode steps: 27, steps per second: 22, episode reward: -1.000, m

  128185/5000000: episode: 2160, duration: 1.176s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001421, mean_absolute_error: 0.424111, mean_q: -0.497783
  128215/5000000: episode: 2161, duration: 1.436s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.267 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001428, mean_absolute_error: 0.430429, mean_q: -0.504273
  128376/5000000: episode: 2162, duration: 7.947s, episode steps: 161, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.888 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001505, mean_absolute_error: 0.428627, mean_q: -0.502066
  128427/5000000: episode: 2163, duration: 2.581s, episode steps: 51, steps per second: 20, episode reward: -1.000, m

  131556/5000000: episode: 2222, duration: 3.124s, episode steps: 63, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.762 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001371, mean_absolute_error: 0.442233, mean_q: -0.518989
  131581/5000000: episode: 2223, duration: 1.158s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.360 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001377, mean_absolute_error: 0.439788, mean_q: -0.515510
  131732/5000000: episode: 2224, duration: 7.205s, episode steps: 151, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.589 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001425, mean_absolute_error: 0.440879, mean_q: -0.517827
  131779/5000000: episode: 2225, duration: 2.398s, episode steps: 47, steps per second: 20, episode reward: -1.000, m

  132867/5000000: episode: 2250, duration: 3.996s, episode steps: 83, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.639 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001373, mean_absolute_error: 0.445575, mean_q: -0.524167
  132901/5000000: episode: 2251, duration: 1.684s, episode steps: 34, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.206 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001376, mean_absolute_error: 0.443279, mean_q: -0.521120
  132927/5000000: episode: 2252, duration: 1.297s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001418, mean_absolute_error: 0.445626, mean_q: -0.523224
  133011/5000000: episode: 2253, duration: 4.079s, episode steps: 84, steps per second: 21, episode reward: -1.000, me

  134081/5000000: episode: 2278, duration: 11.720s, episode steps: 95, steps per second: 8, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.832 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001343, mean_absolute_error: 0.455288, mean_q: -0.535973
  134173/5000000: episode: 2279, duration: 4.338s, episode steps: 92, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001318, mean_absolute_error: 0.455262, mean_q: -0.535203
  134199/5000000: episode: 2280, duration: 1.254s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001374, mean_absolute_error: 0.456403, mean_q: -0.536629
  134338/5000000: episode: 2281, duration: 6.971s, episode steps: 139, steps per second: 20, episode reward: -1.000, m

  135906/5000000: episode: 2306, duration: 1.231s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001255, mean_absolute_error: 0.467955, mean_q: -0.549666
  135931/5000000: episode: 2307, duration: 1.185s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001147, mean_absolute_error: 0.468509, mean_q: -0.551710
  136013/5000000: episode: 2308, duration: 4.084s, episode steps: 82, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.573 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001329, mean_absolute_error: 0.466880, mean_q: -0.548773
  136051/5000000: episode: 2309, duration: 1.782s, episode steps: 38, steps per second: 21, episode reward: -1.000, me

  137755/5000000: episode: 2334, duration: 2.785s, episode steps: 56, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.339 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001195, mean_absolute_error: 0.475395, mean_q: -0.559652
  137783/5000000: episode: 2335, duration: 1.296s, episode steps: 28, steps per second: 22, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001301, mean_absolute_error: 0.475872, mean_q: -0.559922
  137861/5000000: episode: 2336, duration: 3.788s, episode steps: 78, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.449 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001277, mean_absolute_error: 0.475451, mean_q: -0.559197
  137902/5000000: episode: 2337, duration: 1.992s, episode steps: 41, steps per second: 21, episode reward: -1.000, me

  139177/5000000: episode: 2362, duration: 1.330s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.111 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001276, mean_absolute_error: 0.475932, mean_q: -0.558010
  139327/5000000: episode: 2363, duration: 7.436s, episode steps: 150, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.387 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001211, mean_absolute_error: 0.475672, mean_q: -0.558011
  139353/5000000: episode: 2364, duration: 1.340s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.346 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001262, mean_absolute_error: 0.474320, mean_q: -0.557634
  139436/5000000: episode: 2365, duration: 4.087s, episode steps: 83, steps per second: 20, episode reward: -1.000, m

  141360/5000000: episode: 2390, duration: 1.283s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001154, mean_absolute_error: 0.475853, mean_q: -0.559606
  141385/5000000: episode: 2391, duration: 1.237s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001255, mean_absolute_error: 0.476795, mean_q: -0.559149
  141413/5000000: episode: 2392, duration: 1.224s, episode steps: 28, steps per second: 23, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001187, mean_absolute_error: 0.474300, mean_q: -0.557223
  141444/5000000: episode: 2393, duration: 1.515s, episode steps: 31, steps per second: 20, episode reward: -1.000, me

  143055/5000000: episode: 2418, duration: 4.829s, episode steps: 97, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.299 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001235, mean_absolute_error: 0.473704, mean_q: -0.556206
  143265/5000000: episode: 2419, duration: 10.330s, episode steps: 210, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001259, mean_absolute_error: 0.472312, mean_q: -0.555198
  143293/5000000: episode: 2420, duration: 1.353s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001178, mean_absolute_error: 0.470343, mean_q: -0.551912
  143318/5000000: episode: 2421, duration: 1.235s, episode steps: 25, steps per second: 20, episode reward: -1.000, 

  145121/5000000: episode: 2446, duration: 1.194s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.040 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001217, mean_absolute_error: 0.467391, mean_q: -0.549236
  145178/5000000: episode: 2447, duration: 2.736s, episode steps: 57, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.404 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001236, mean_absolute_error: 0.465188, mean_q: -0.547062
  145209/5000000: episode: 2448, duration: 1.435s, episode steps: 31, steps per second: 22, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001287, mean_absolute_error: 0.466540, mean_q: -0.547913
  145235/5000000: episode: 2449, duration: 1.353s, episode steps: 26, steps per second: 19, episode reward: -1.000, me

  146797/5000000: episode: 2474, duration: 1.412s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001427, mean_absolute_error: 0.458630, mean_q: -0.539940
  146848/5000000: episode: 2475, duration: 2.467s, episode steps: 51, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.510 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001282, mean_absolute_error: 0.460765, mean_q: -0.541969
  146903/5000000: episode: 2476, duration: 2.670s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.564 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001259, mean_absolute_error: 0.458345, mean_q: -0.540229
  147057/5000000: episode: 2477, duration: 7.618s, episode steps: 154, steps per second: 20, episode reward: -1.000, m

  148381/5000000: episode: 2502, duration: 1.454s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001274, mean_absolute_error: 0.454141, mean_q: -0.534925
  148408/5000000: episode: 2503, duration: 1.408s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001415, mean_absolute_error: 0.455949, mean_q: -0.535727
  148434/5000000: episode: 2504, duration: 1.401s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.269 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001290, mean_absolute_error: 0.453983, mean_q: -0.533023
  148459/5000000: episode: 2505, duration: 1.195s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  150154/5000000: episode: 2530, duration: 1.362s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.259 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001333, mean_absolute_error: 0.449558, mean_q: -0.526471
  150419/5000000: episode: 2531, duration: 12.776s, episode steps: 265, steps per second: 21, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001363, mean_absolute_error: 0.447075, mean_q: -0.525060
  150473/5000000: episode: 2532, duration: 2.514s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001376, mean_absolute_error: 0.448656, mean_q: -0.527514
  150498/5000000: episode: 2533, duration: 1.262s, episode steps: 25, steps per second: 20, episode reward: -1.000, 

  152445/5000000: episode: 2558, duration: 2.706s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.426 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001542, mean_absolute_error: 0.441731, mean_q: -0.517349
  152479/5000000: episode: 2559, duration: 1.732s, episode steps: 34, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.353 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001385, mean_absolute_error: 0.440279, mean_q: -0.514947
  152563/5000000: episode: 2560, duration: 3.889s, episode steps: 84, steps per second: 22, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.583 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001462, mean_absolute_error: 0.440965, mean_q: -0.515958
  152589/5000000: episode: 2561, duration: 1.336s, episode steps: 26, steps per second: 19, episode reward: -1.000, me

  154163/5000000: episode: 2586, duration: 1.210s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001339, mean_absolute_error: 0.435682, mean_q: -0.510590
  154207/5000000: episode: 2587, duration: 2.096s, episode steps: 44, steps per second: 21, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001422, mean_absolute_error: 0.436055, mean_q: -0.511532
  154234/5000000: episode: 2588, duration: 1.298s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001503, mean_absolute_error: 0.435325, mean_q: -0.509435
  154259/5000000: episode: 2589, duration: 1.070s, episode steps: 25, steps per second: 23, episode reward: -1.000, me

  155671/5000000: episode: 2614, duration: 1.494s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001525, mean_absolute_error: 0.432078, mean_q: -0.506433
  155801/5000000: episode: 2615, duration: 6.566s, episode steps: 130, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.715 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001490, mean_absolute_error: 0.430121, mean_q: -0.503555
  155850/5000000: episode: 2616, duration: 2.487s, episode steps: 49, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.204 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001530, mean_absolute_error: 0.428943, mean_q: -0.502307
  155885/5000000: episode: 2617, duration: 1.757s, episode steps: 35, steps per second: 20, episode reward: -1.000, m

  157058/5000000: episode: 2642, duration: 1.204s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001500, mean_absolute_error: 0.427970, mean_q: -0.501460
  157090/5000000: episode: 2643, duration: 1.608s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 1.938 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001524, mean_absolute_error: 0.428180, mean_q: -0.501266
  157115/5000000: episode: 2644, duration: 1.236s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001489, mean_absolute_error: 0.426945, mean_q: -0.500442
  157142/5000000: episode: 2645, duration: 1.274s, episode steps: 27, steps per second: 21, episode reward: -1.000, me

  158719/5000000: episode: 2670, duration: 6.556s, episode steps: 145, steps per second: 22, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.759 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001490, mean_absolute_error: 0.420476, mean_q: -0.492468
  158868/5000000: episode: 2671, duration: 7.109s, episode steps: 149, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.456 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001454, mean_absolute_error: 0.420459, mean_q: -0.491883
  158927/5000000: episode: 2672, duration: 2.796s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.356 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001529, mean_absolute_error: 0.418371, mean_q: -0.489598
  158960/5000000: episode: 2673, duration: 1.607s, episode steps: 33, steps per second: 21, episode reward: -1.000, 

  160325/5000000: episode: 2698, duration: 1.363s, episode steps: 25, steps per second: 18, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001448, mean_absolute_error: 0.411839, mean_q: -0.480865
  160362/5000000: episode: 2699, duration: 1.674s, episode steps: 37, steps per second: 22, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.541 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001708, mean_absolute_error: 0.415104, mean_q: -0.484890
  160388/5000000: episode: 2700, duration: 1.259s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [1.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001507, mean_absolute_error: 0.413033, mean_q: -0.482925
  160413/5000000: episode: 2701, duration: 1.240s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  162029/5000000: episode: 2726, duration: 3.217s, episode steps: 64, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001621, mean_absolute_error: 0.408703, mean_q: -0.476176
  162057/5000000: episode: 2727, duration: 1.299s, episode steps: 28, steps per second: 22, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.643 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001598, mean_absolute_error: 0.408712, mean_q: -0.476809
  162082/5000000: episode: 2728, duration: 1.200s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001526, mean_absolute_error: 0.408947, mean_q: -0.477279
  162258/5000000: episode: 2729, duration: 8.283s, episode steps: 176, steps per second: 21, episode reward: -1.000, m

  163821/5000000: episode: 2754, duration: 2.629s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.691 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001674, mean_absolute_error: 0.404516, mean_q: -0.471765
  163880/5000000: episode: 2755, duration: 2.979s, episode steps: 59, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.542 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001625, mean_absolute_error: 0.402179, mean_q: -0.469496
  163906/5000000: episode: 2756, duration: 1.348s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.038 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001577, mean_absolute_error: 0.403552, mean_q: -0.470669
  163959/5000000: episode: 2757, duration: 2.469s, episode steps: 53, steps per second: 21, episode reward: -1.000, me

  165721/5000000: episode: 2782, duration: 1.413s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.071 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001769, mean_absolute_error: 0.392791, mean_q: -0.456653
  165928/5000000: episode: 2783, duration: 9.323s, episode steps: 207, steps per second: 22, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.589 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001673, mean_absolute_error: 0.394055, mean_q: -0.457255
  166008/5000000: episode: 2784, duration: 4.227s, episode steps: 80, steps per second: 19, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.513 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001681, mean_absolute_error: 0.392307, mean_q: -0.454850
  166093/5000000: episode: 2785, duration: 4.138s, episode steps: 85, steps per second: 21, episode reward: -1.000, m

  167732/5000000: episode: 2810, duration: 1.361s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.037 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001632, mean_absolute_error: 0.382431, mean_q: -0.441475
  167953/5000000: episode: 2811, duration: 10.920s, episode steps: 221, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.534 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001752, mean_absolute_error: 0.384953, mean_q: -0.445414
  167980/5000000: episode: 2812, duration: 1.305s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001790, mean_absolute_error: 0.385022, mean_q: -0.446003
  168081/5000000: episode: 2813, duration: 4.899s, episode steps: 101, steps per second: 21, episode reward: -1.000,

  169248/5000000: episode: 2838, duration: 5.643s, episode steps: 120, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.358 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001750, mean_absolute_error: 0.377163, mean_q: -0.435504
  169275/5000000: episode: 2839, duration: 1.373s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001826, mean_absolute_error: 0.377106, mean_q: -0.435660
  169343/5000000: episode: 2840, duration: 3.358s, episode steps: 68, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001800, mean_absolute_error: 0.376692, mean_q: -0.435090
  169540/5000000: episode: 2841, duration: 9.430s, episode steps: 197, steps per second: 21, episode reward: -1.000, 

  170962/5000000: episode: 2866, duration: 1.194s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001783, mean_absolute_error: 0.371997, mean_q: -0.429248
  171013/5000000: episode: 2867, duration: 2.644s, episode steps: 51, steps per second: 19, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.255 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001828, mean_absolute_error: 0.374223, mean_q: -0.433334
  171068/5000000: episode: 2868, duration: 2.683s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.745 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001812, mean_absolute_error: 0.373628, mean_q: -0.432315
  171093/5000000: episode: 2869, duration: 1.205s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  172566/5000000: episode: 2894, duration: 3.740s, episode steps: 77, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.753 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001903, mean_absolute_error: 0.375731, mean_q: -0.434341
  172631/5000000: episode: 2895, duration: 3.220s, episode steps: 65, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001884, mean_absolute_error: 0.376399, mean_q: -0.435102
  172659/5000000: episode: 2896, duration: 1.529s, episode steps: 28, steps per second: 18, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.857 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001867, mean_absolute_error: 0.375477, mean_q: -0.433723
  172694/5000000: episode: 2897, duration: 1.718s, episode steps: 35, steps per second: 20, episode reward: -1.000, me

  173957/5000000: episode: 2922, duration: 1.318s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001820, mean_absolute_error: 0.378854, mean_q: -0.438404
  173993/5000000: episode: 2923, duration: 1.811s, episode steps: 36, steps per second: 20, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.306 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001941, mean_absolute_error: 0.376618, mean_q: -0.435931
  174020/5000000: episode: 2924, duration: 1.369s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.556 [1.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001890, mean_absolute_error: 0.377625, mean_q: -0.436330
  174047/5000000: episode: 2925, duration: 1.241s, episode steps: 27, steps per second: 22, episode reward: -1.000, me

  175374/5000000: episode: 2950, duration: 4.544s, episode steps: 91, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.374 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001939, mean_absolute_error: 0.378866, mean_q: -0.437757
  175459/5000000: episode: 2951, duration: 4.192s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001833, mean_absolute_error: 0.377656, mean_q: -0.437486
  175592/5000000: episode: 2952, duration: 6.543s, episode steps: 133, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.271 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001895, mean_absolute_error: 0.379650, mean_q: -0.438661
  175627/5000000: episode: 2953, duration: 1.728s, episode steps: 35, steps per second: 20, episode reward: -1.000, m

  177325/5000000: episode: 2978, duration: 7.094s, episode steps: 141, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.567 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001903, mean_absolute_error: 0.378484, mean_q: -0.437255
  177354/5000000: episode: 2979, duration: 1.512s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.069 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001848, mean_absolute_error: 0.377118, mean_q: -0.435520
  177418/5000000: episode: 2980, duration: 3.214s, episode steps: 64, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.406 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002027, mean_absolute_error: 0.379046, mean_q: -0.438092
  177443/5000000: episode: 2981, duration: 1.211s, episode steps: 25, steps per second: 21, episode reward: -1.000, m

  178962/5000000: episode: 3006, duration: 8.369s, episode steps: 171, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.275 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001918, mean_absolute_error: 0.376265, mean_q: -0.434267
  178987/5000000: episode: 3007, duration: 1.132s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.040 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001790, mean_absolute_error: 0.374779, mean_q: -0.430768
  179167/5000000: episode: 3008, duration: 9.096s, episode steps: 180, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.439 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001888, mean_absolute_error: 0.374591, mean_q: -0.432114
  179192/5000000: episode: 3009, duration: 1.195s, episode steps: 25, steps per second: 21, episode reward: -1.000, 

  180646/5000000: episode: 3034, duration: 1.294s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.037 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001991, mean_absolute_error: 0.367138, mean_q: -0.421060
  180675/5000000: episode: 3035, duration: 1.424s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001974, mean_absolute_error: 0.367457, mean_q: -0.421158
  180807/5000000: episode: 3036, duration: 6.429s, episode steps: 132, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.098 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001945, mean_absolute_error: 0.368626, mean_q: -0.423643
  181067/5000000: episode: 3037, duration: 12.165s, episode steps: 260, steps per second: 21, episode reward: -1.000,

  182354/5000000: episode: 3062, duration: 1.465s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.387 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002004, mean_absolute_error: 0.369153, mean_q: -0.422851
  182415/5000000: episode: 3063, duration: 2.958s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.770 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002046, mean_absolute_error: 0.365902, mean_q: -0.418296
  182440/5000000: episode: 3064, duration: 1.216s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001945, mean_absolute_error: 0.365659, mean_q: -0.417547
  182465/5000000: episode: 3065, duration: 1.239s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  184016/5000000: episode: 3090, duration: 4.621s, episode steps: 83, steps per second: 18, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.627 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001873, mean_absolute_error: 0.367635, mean_q: -0.421001
  184104/5000000: episode: 3091, duration: 4.284s, episode steps: 88, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.511 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.002000, mean_absolute_error: 0.367134, mean_q: -0.420406
  184174/5000000: episode: 3092, duration: 3.430s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.271 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001862, mean_absolute_error: 0.367658, mean_q: -0.421680
  184209/5000000: episode: 3093, duration: 1.742s, episode steps: 35, steps per second: 20, episode reward: -1.000, me

  185762/5000000: episode: 3118, duration: 1.317s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.115 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002008, mean_absolute_error: 0.369626, mean_q: -0.423139
  185792/5000000: episode: 3119, duration: 1.543s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.633 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001775, mean_absolute_error: 0.368214, mean_q: -0.421578
  185818/5000000: episode: 3120, duration: 1.311s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002026, mean_absolute_error: 0.368991, mean_q: -0.420596
  185844/5000000: episode: 3121, duration: 1.325s, episode steps: 26, steps per second: 20, episode reward: -1.000, me

  187297/5000000: episode: 3146, duration: 3.955s, episode steps: 84, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.679 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001867, mean_absolute_error: 0.371411, mean_q: -0.423885
  187325/5000000: episode: 3147, duration: 1.404s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.002063, mean_absolute_error: 0.371840, mean_q: -0.424203
  187431/5000000: episode: 3148, duration: 5.003s, episode steps: 106, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.557 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001816, mean_absolute_error: 0.371146, mean_q: -0.424228
  187738/5000000: episode: 3149, duration: 15.063s, episode steps: 307, steps per second: 20, episode reward: -1.000,

  189059/5000000: episode: 3174, duration: 2.375s, episode steps: 50, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001741, mean_absolute_error: 0.376308, mean_q: -0.429438
  189092/5000000: episode: 3175, duration: 1.576s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.879 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001757, mean_absolute_error: 0.374191, mean_q: -0.427905
  189139/5000000: episode: 3176, duration: 2.374s, episode steps: 47, steps per second: 20, episode reward: -1.000, mean reward: -0.021 [-1.000, 0.000], mean action: 2.362 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001851, mean_absolute_error: 0.374366, mean_q: -0.427278
  189166/5000000: episode: 3177, duration: 1.421s, episode steps: 27, steps per second: 19, episode reward: -1.000, me

  190393/5000000: episode: 3202, duration: 4.186s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001787, mean_absolute_error: 0.378377, mean_q: -0.431247
  190425/5000000: episode: 3203, duration: 1.516s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.875 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001862, mean_absolute_error: 0.377861, mean_q: -0.433016
  190508/5000000: episode: 3204, duration: 3.994s, episode steps: 83, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.361 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001915, mean_absolute_error: 0.381620, mean_q: -0.435861
  190570/5000000: episode: 3205, duration: 2.910s, episode steps: 62, steps per second: 21, episode reward: -1.000, me

  191979/5000000: episode: 3230, duration: 1.829s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.474 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001961, mean_absolute_error: 0.382267, mean_q: -0.436242
  192004/5000000: episode: 3231, duration: 1.307s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001830, mean_absolute_error: 0.381659, mean_q: -0.436147
  192031/5000000: episode: 3232, duration: 1.269s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002151, mean_absolute_error: 0.384978, mean_q: -0.438830
  192112/5000000: episode: 3233, duration: 4.079s, episode steps: 81, steps per second: 20, episode reward: -1.000, me

  193868/5000000: episode: 3258, duration: 1.248s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001709, mean_absolute_error: 0.388548, mean_q: -0.443332
  193898/5000000: episode: 3259, duration: 1.437s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.633 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001845, mean_absolute_error: 0.387443, mean_q: -0.440794
  193923/5000000: episode: 3260, duration: 1.301s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001753, mean_absolute_error: 0.389860, mean_q: -0.441835
  193948/5000000: episode: 3261, duration: 1.199s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  195494/5000000: episode: 3286, duration: 1.693s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.257 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001990, mean_absolute_error: 0.395139, mean_q: -0.444914
  195527/5000000: episode: 3287, duration: 1.579s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.485 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001881, mean_absolute_error: 0.391065, mean_q: -0.439569
  195552/5000000: episode: 3288, duration: 1.175s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.160 [1.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001729, mean_absolute_error: 0.393000, mean_q: -0.445765
  195663/5000000: episode: 3289, duration: 5.413s, episode steps: 111, steps per second: 21, episode reward: -1.000, m

  197355/5000000: episode: 3314, duration: 1.311s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001863, mean_absolute_error: 0.407663, mean_q: -0.457171
  197598/5000000: episode: 3315, duration: 11.622s, episode steps: 243, steps per second: 21, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.453 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001803, mean_absolute_error: 0.404484, mean_q: -0.456082
  197693/5000000: episode: 3316, duration: 4.708s, episode steps: 95, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001868, mean_absolute_error: 0.405035, mean_q: -0.459471
  197774/5000000: episode: 3317, duration: 3.817s, episode steps: 81, steps per second: 21, episode reward: -1.000, 

  198994/5000000: episode: 3342, duration: 4.369s, episode steps: 86, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.919 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001818, mean_absolute_error: 0.410934, mean_q: -0.468005
  199055/5000000: episode: 3343, duration: 2.864s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.426 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001682, mean_absolute_error: 0.408728, mean_q: -0.466415
  199095/5000000: episode: 3344, duration: 1.993s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.375 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001816, mean_absolute_error: 0.411471, mean_q: -0.468742
  199128/5000000: episode: 3345, duration: 1.565s, episode steps: 33, steps per second: 21, episode reward: -1.000, me

  200700/5000000: episode: 3370, duration: 6.845s, episode steps: 142, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.634 [0.000, 5.000], mean observation: 0.078 [0.000, 24.000], loss: 0.001737, mean_absolute_error: 0.417133, mean_q: -0.476727
  201000/5000000: episode: 3371, duration: 15.360s, episode steps: 300, steps per second: 20, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001690, mean_absolute_error: 0.415939, mean_q: -0.480282
  201025/5000000: episode: 3372, duration: 1.255s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001807, mean_absolute_error: 0.415947, mean_q: -0.483865
  201060/5000000: episode: 3373, duration: 1.819s, episode steps: 35, steps per second: 19, episode reward: -1.000,

  202761/5000000: episode: 3398, duration: 1.262s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001537, mean_absolute_error: 0.416452, mean_q: -0.486625
  202788/5000000: episode: 3399, duration: 1.311s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001673, mean_absolute_error: 0.420019, mean_q: -0.490969
  202816/5000000: episode: 3400, duration: 1.392s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.714 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001746, mean_absolute_error: 0.415203, mean_q: -0.484900
  202850/5000000: episode: 3401, duration: 1.653s, episode steps: 34, steps per second: 21, episode reward: -1.000, me

  204076/5000000: episode: 3426, duration: 4.077s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.494 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001598, mean_absolute_error: 0.419026, mean_q: -0.489664
  204126/5000000: episode: 3427, duration: 2.429s, episode steps: 50, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.380 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001635, mean_absolute_error: 0.419025, mean_q: -0.490112
  204252/5000000: episode: 3428, duration: 6.274s, episode steps: 126, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001586, mean_absolute_error: 0.419378, mean_q: -0.489768
  204308/5000000: episode: 3429, duration: 2.641s, episode steps: 56, steps per second: 21, episode reward: -1.000, m

  205289/5000000: episode: 3454, duration: 1.297s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.080 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001600, mean_absolute_error: 0.417970, mean_q: -0.488166
  205314/5000000: episode: 3455, duration: 1.214s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001580, mean_absolute_error: 0.420808, mean_q: -0.492423
  205455/5000000: episode: 3456, duration: 6.961s, episode steps: 141, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001586, mean_absolute_error: 0.420743, mean_q: -0.491829
  205509/5000000: episode: 3457, duration: 2.676s, episode steps: 54, steps per second: 20, episode reward: -1.000, m

  206562/5000000: episode: 3482, duration: 1.847s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.811 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001515, mean_absolute_error: 0.422929, mean_q: -0.495079
  206753/5000000: episode: 3483, duration: 9.793s, episode steps: 191, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.675 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001567, mean_absolute_error: 0.422036, mean_q: -0.493169
  206778/5000000: episode: 3484, duration: 1.228s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001653, mean_absolute_error: 0.422654, mean_q: -0.494051
  206804/5000000: episode: 3485, duration: 1.370s, episode steps: 26, steps per second: 19, episode reward: -1.000, m

  207759/5000000: episode: 3510, duration: 1.363s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.037 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001513, mean_absolute_error: 0.423655, mean_q: -0.496016
  207889/5000000: episode: 3511, duration: 6.355s, episode steps: 130, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.569 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001492, mean_absolute_error: 0.423438, mean_q: -0.495561
  207984/5000000: episode: 3512, duration: 4.756s, episode steps: 95, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.674 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.001562, mean_absolute_error: 0.424821, mean_q: -0.497027
  208026/5000000: episode: 3513, duration: 2.118s, episode steps: 42, steps per second: 20, episode reward: -1.000, m

  209679/5000000: episode: 3538, duration: 1.758s, episode steps: 36, steps per second: 20, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001544, mean_absolute_error: 0.425575, mean_q: -0.497535
  209757/5000000: episode: 3539, duration: 3.973s, episode steps: 78, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001559, mean_absolute_error: 0.424808, mean_q: -0.497185
  209847/5000000: episode: 3540, duration: 4.491s, episode steps: 90, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001499, mean_absolute_error: 0.426075, mean_q: -0.498716
  209873/5000000: episode: 3541, duration: 1.267s, episode steps: 26, steps per second: 21, episode reward: -1.000, me

  211446/5000000: episode: 3566, duration: 6.685s, episode steps: 140, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.364 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001463, mean_absolute_error: 0.426523, mean_q: -0.498695
  211475/5000000: episode: 3567, duration: 1.559s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001525, mean_absolute_error: 0.426027, mean_q: -0.497752
  211575/5000000: episode: 3568, duration: 4.950s, episode steps: 100, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.450 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001477, mean_absolute_error: 0.424581, mean_q: -0.496499
  211652/5000000: episode: 3569, duration: 3.779s, episode steps: 77, steps per second: 20, episode reward: -1.000, 

  212977/5000000: episode: 3594, duration: 1.464s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001593, mean_absolute_error: 0.421204, mean_q: -0.491397
  213007/5000000: episode: 3595, duration: 1.454s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.700 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001552, mean_absolute_error: 0.419968, mean_q: -0.490565
  213132/5000000: episode: 3596, duration: 5.987s, episode steps: 125, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.416 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001485, mean_absolute_error: 0.420031, mean_q: -0.491387
  213165/5000000: episode: 3597, duration: 1.496s, episode steps: 33, steps per second: 22, episode reward: -1.000, m

  214580/5000000: episode: 3622, duration: 1.397s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001456, mean_absolute_error: 0.420735, mean_q: -0.492620
  214608/5000000: episode: 3623, duration: 1.462s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001578, mean_absolute_error: 0.421069, mean_q: -0.492781
  214645/5000000: episode: 3624, duration: 1.925s, episode steps: 37, steps per second: 19, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.270 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001582, mean_absolute_error: 0.422652, mean_q: -0.494405
  214714/5000000: episode: 3625, duration: 3.272s, episode steps: 69, steps per second: 21, episode reward: -1.000, me

  215725/5000000: episode: 3650, duration: 2.866s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.186 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001475, mean_absolute_error: 0.425465, mean_q: -0.498261
  215911/5000000: episode: 3651, duration: 9.295s, episode steps: 186, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001511, mean_absolute_error: 0.426452, mean_q: -0.499681
  215937/5000000: episode: 3652, duration: 1.258s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001699, mean_absolute_error: 0.427461, mean_q: -0.500994
  215962/5000000: episode: 3653, duration: 1.247s, episode steps: 25, steps per second: 20, episode reward: -1.000, m

  217484/5000000: episode: 3678, duration: 1.188s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001400, mean_absolute_error: 0.435779, mean_q: -0.510229
  217685/5000000: episode: 3679, duration: 9.784s, episode steps: 201, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.388 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001438, mean_absolute_error: 0.435871, mean_q: -0.511002
  217744/5000000: episode: 3680, duration: 3.006s, episode steps: 59, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.932 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001510, mean_absolute_error: 0.436549, mean_q: -0.512730
  217799/5000000: episode: 3681, duration: 2.646s, episode steps: 55, steps per second: 21, episode reward: -1.000, m

  219596/5000000: episode: 3706, duration: 1.390s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.179 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001456, mean_absolute_error: 0.429481, mean_q: -0.502670
  219688/5000000: episode: 3707, duration: 4.603s, episode steps: 92, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.228 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.430142, mean_q: -0.503892
  219718/5000000: episode: 3708, duration: 1.427s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.367 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001630, mean_absolute_error: 0.430790, mean_q: -0.504747
  219748/5000000: episode: 3709, duration: 1.544s, episode steps: 30, steps per second: 19, episode reward: -1.000, me

  221393/5000000: episode: 3734, duration: 1.469s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.067 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001573, mean_absolute_error: 0.421251, mean_q: -0.492803
  221596/5000000: episode: 3735, duration: 9.498s, episode steps: 203, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001471, mean_absolute_error: 0.423217, mean_q: -0.495243
  221695/5000000: episode: 3736, duration: 4.941s, episode steps: 99, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.525 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001518, mean_absolute_error: 0.422092, mean_q: -0.493490
  221785/5000000: episode: 3737, duration: 4.477s, episode steps: 90, steps per second: 20, episode reward: -1.000, m

  223377/5000000: episode: 3762, duration: 1.622s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001500, mean_absolute_error: 0.418171, mean_q: -0.488995
  223611/5000000: episode: 3763, duration: 11.562s, episode steps: 234, steps per second: 20, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.416703, mean_q: -0.486942
  223636/5000000: episode: 3764, duration: 1.170s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.416702, mean_q: -0.486242
  223661/5000000: episode: 3765, duration: 1.248s, episode steps: 25, steps per second: 20, episode reward: -1.000, 

  226035/5000000: episode: 3790, duration: 1.671s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.257 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001611, mean_absolute_error: 0.402160, mean_q: -0.468365
  226172/5000000: episode: 3791, duration: 6.851s, episode steps: 137, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.766 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001740, mean_absolute_error: 0.402407, mean_q: -0.468289
  226202/5000000: episode: 3792, duration: 1.507s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001740, mean_absolute_error: 0.402573, mean_q: -0.468844
  226236/5000000: episode: 3793, duration: 1.583s, episode steps: 34, steps per second: 21, episode reward: -1.000, m

  227577/5000000: episode: 3818, duration: 1.580s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.906 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001701, mean_absolute_error: 0.397546, mean_q: -0.462535
  227801/5000000: episode: 3819, duration: 11.156s, episode steps: 224, steps per second: 20, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.344 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001714, mean_absolute_error: 0.396274, mean_q: -0.460616
  227857/5000000: episode: 3820, duration: 2.581s, episode steps: 56, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 1.911 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001763, mean_absolute_error: 0.396215, mean_q: -0.460642
  227891/5000000: episode: 3821, duration: 1.674s, episode steps: 34, steps per second: 20, episode reward: -1.000, 

  229886/5000000: episode: 3846, duration: 1.582s, episode steps: 29, steps per second: 18, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001796, mean_absolute_error: 0.384148, mean_q: -0.445762
  229916/5000000: episode: 3847, duration: 1.454s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.467 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001831, mean_absolute_error: 0.386469, mean_q: -0.447503
  229957/5000000: episode: 3848, duration: 2.053s, episode steps: 41, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.146 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001858, mean_absolute_error: 0.386586, mean_q: -0.448691
  229991/5000000: episode: 3849, duration: 1.672s, episode steps: 34, steps per second: 20, episode reward: -1.000, me

  231821/5000000: episode: 3874, duration: 1.293s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001889, mean_absolute_error: 0.373604, mean_q: -0.433115
  231887/5000000: episode: 3875, duration: 3.245s, episode steps: 66, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001979, mean_absolute_error: 0.372032, mean_q: -0.429940
  231983/5000000: episode: 3876, duration: 4.733s, episode steps: 96, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.521 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001871, mean_absolute_error: 0.370183, mean_q: -0.427136
  232058/5000000: episode: 3877, duration: 3.800s, episode steps: 75, steps per second: 20, episode reward: -1.000, me

  233591/5000000: episode: 3902, duration: 1.351s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002091, mean_absolute_error: 0.363369, mean_q: -0.417847
  233661/5000000: episode: 3903, duration: 3.541s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.271 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002001, mean_absolute_error: 0.359451, mean_q: -0.413530
  233688/5000000: episode: 3904, duration: 1.369s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001908, mean_absolute_error: 0.358538, mean_q: -0.413125
  233762/5000000: episode: 3905, duration: 3.551s, episode steps: 74, steps per second: 21, episode reward: -1.000, me

  235024/5000000: episode: 3930, duration: 1.354s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.897 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002090, mean_absolute_error: 0.355157, mean_q: -0.407280
  235251/5000000: episode: 3931, duration: 10.919s, episode steps: 227, steps per second: 21, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002057, mean_absolute_error: 0.353100, mean_q: -0.404181
  235281/5000000: episode: 3932, duration: 1.379s, episode steps: 30, steps per second: 22, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002067, mean_absolute_error: 0.354005, mean_q: -0.406870
  235350/5000000: episode: 3933, duration: 3.400s, episode steps: 69, steps per second: 20, episode reward: -1.000, 

  236619/5000000: episode: 3958, duration: 3.723s, episode steps: 73, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.260 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002118, mean_absolute_error: 0.343012, mean_q: -0.390419
  236651/5000000: episode: 3959, duration: 1.591s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.562 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002114, mean_absolute_error: 0.342305, mean_q: -0.389831
  236677/5000000: episode: 3960, duration: 1.319s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002257, mean_absolute_error: 0.344996, mean_q: -0.393725
  236731/5000000: episode: 3961, duration: 2.572s, episode steps: 54, steps per second: 21, episode reward: -1.000, me

  238545/5000000: episode: 3986, duration: 9.974s, episode steps: 202, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.366 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.002182, mean_absolute_error: 0.335973, mean_q: -0.382137
  238650/5000000: episode: 3987, duration: 4.952s, episode steps: 105, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.352 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002205, mean_absolute_error: 0.335222, mean_q: -0.380454
  238681/5000000: episode: 3988, duration: 1.551s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.032 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002343, mean_absolute_error: 0.333742, mean_q: -0.379669
  238712/5000000: episode: 3989, duration: 1.474s, episode steps: 31, steps per second: 21, episode reward: -1.000, 

  240525/5000000: episode: 4014, duration: 1.542s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002368, mean_absolute_error: 0.328626, mean_q: -0.371090
  240644/5000000: episode: 4015, duration: 5.816s, episode steps: 119, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.487 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002308, mean_absolute_error: 0.327484, mean_q: -0.370178
  240714/5000000: episode: 4016, duration: 3.437s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.514 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002482, mean_absolute_error: 0.328835, mean_q: -0.371028
  240749/5000000: episode: 4017, duration: 1.684s, episode steps: 35, steps per second: 21, episode reward: -1.000, m

  242235/5000000: episode: 4042, duration: 2.904s, episode steps: 59, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002328, mean_absolute_error: 0.324370, mean_q: -0.365270
  242291/5000000: episode: 4043, duration: 2.709s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.714 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002311, mean_absolute_error: 0.324336, mean_q: -0.366497
  242318/5000000: episode: 4044, duration: 1.412s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002166, mean_absolute_error: 0.321950, mean_q: -0.364588
  242344/5000000: episode: 4045, duration: 1.306s, episode steps: 26, steps per second: 20, episode reward: -1.000, me

  243310/5000000: episode: 4070, duration: 1.883s, episode steps: 39, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002419, mean_absolute_error: 0.321838, mean_q: -0.362650
  243337/5000000: episode: 4071, duration: 1.306s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002204, mean_absolute_error: 0.320668, mean_q: -0.360693
  243364/5000000: episode: 4072, duration: 1.258s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002257, mean_absolute_error: 0.321312, mean_q: -0.362281
  243401/5000000: episode: 4073, duration: 1.760s, episode steps: 37, steps per second: 21, episode reward: -1.000, me

  244802/5000000: episode: 4098, duration: 1.247s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002429, mean_absolute_error: 0.321723, mean_q: -0.362045
  244828/5000000: episode: 4099, duration: 1.299s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.038 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002418, mean_absolute_error: 0.324578, mean_q: -0.365676
  244879/5000000: episode: 4100, duration: 2.532s, episode steps: 51, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.196 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002495, mean_absolute_error: 0.324627, mean_q: -0.366372
  244937/5000000: episode: 4101, duration: 3.016s, episode steps: 58, steps per second: 19, episode reward: -1.000, me

  246033/5000000: episode: 4126, duration: 1.330s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002359, mean_absolute_error: 0.332083, mean_q: -0.375748
  246093/5000000: episode: 4127, duration: 2.946s, episode steps: 60, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.383 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002238, mean_absolute_error: 0.326200, mean_q: -0.367906
  246365/5000000: episode: 4128, duration: 12.632s, episode steps: 272, steps per second: 22, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.445 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.002306, mean_absolute_error: 0.328606, mean_q: -0.371688
  246393/5000000: episode: 4129, duration: 1.361s, episode steps: 28, steps per second: 21, episode reward: -1.000, 

  247694/5000000: episode: 4154, duration: 3.074s, episode steps: 63, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002189, mean_absolute_error: 0.331676, mean_q: -0.375306
  247732/5000000: episode: 4155, duration: 1.914s, episode steps: 38, steps per second: 20, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.158 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002241, mean_absolute_error: 0.333846, mean_q: -0.379164
  247757/5000000: episode: 4156, duration: 1.219s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002135, mean_absolute_error: 0.330890, mean_q: -0.376678
  247807/5000000: episode: 4157, duration: 2.383s, episode steps: 50, steps per second: 21, episode reward: -1.000, me

  249030/5000000: episode: 4182, duration: 4.063s, episode steps: 81, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.123 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.002320, mean_absolute_error: 0.336653, mean_q: -0.382046
  249062/5000000: episode: 4183, duration: 1.468s, episode steps: 32, steps per second: 22, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.531 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.002211, mean_absolute_error: 0.334648, mean_q: -0.379898
  249260/5000000: episode: 4184, duration: 9.902s, episode steps: 198, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.646 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.002268, mean_absolute_error: 0.336976, mean_q: -0.382353
  249285/5000000: episode: 4185, duration: 1.285s, episode steps: 25, steps per second: 19, episode reward: -1.000, m

  251122/5000000: episode: 4210, duration: 1.244s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002173, mean_absolute_error: 0.346883, mean_q: -0.397135
  251152/5000000: episode: 4211, duration: 1.394s, episode steps: 30, steps per second: 22, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.467 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002385, mean_absolute_error: 0.349434, mean_q: -0.399493
  251205/5000000: episode: 4212, duration: 2.601s, episode steps: 53, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.717 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002177, mean_absolute_error: 0.349034, mean_q: -0.399452
  251466/5000000: episode: 4213, duration: 12.503s, episode steps: 261, steps per second: 21, episode reward: -1.000, 

  252887/5000000: episode: 4238, duration: 2.660s, episode steps: 53, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.528 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002119, mean_absolute_error: 0.352279, mean_q: -0.402080
  252982/5000000: episode: 4239, duration: 4.571s, episode steps: 95, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.611 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.002124, mean_absolute_error: 0.352929, mean_q: -0.402956
  253013/5000000: episode: 4240, duration: 1.577s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.290 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002209, mean_absolute_error: 0.352200, mean_q: -0.401234
  253039/5000000: episode: 4241, duration: 1.208s, episode steps: 26, steps per second: 22, episode reward: -1.000, me

  254348/5000000: episode: 4266, duration: 2.792s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.356 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002091, mean_absolute_error: 0.357181, mean_q: -0.408401
  254418/5000000: episode: 4267, duration: 3.310s, episode steps: 70, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002112, mean_absolute_error: 0.360105, mean_q: -0.412364
  254449/5000000: episode: 4268, duration: 1.543s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.419 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.002039, mean_absolute_error: 0.356355, mean_q: -0.408022
  254475/5000000: episode: 4269, duration: 1.244s, episode steps: 26, steps per second: 21, episode reward: -1.000, me

  256036/5000000: episode: 4294, duration: 3.952s, episode steps: 86, steps per second: 22, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.477 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002031, mean_absolute_error: 0.365379, mean_q: -0.420636
  256064/5000000: episode: 4295, duration: 1.391s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002122, mean_absolute_error: 0.364756, mean_q: -0.419853
  256089/5000000: episode: 4296, duration: 1.267s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002105, mean_absolute_error: 0.366748, mean_q: -0.422898
  256128/5000000: episode: 4297, duration: 1.940s, episode steps: 39, steps per second: 20, episode reward: -1.000, me

  257424/5000000: episode: 4322, duration: 2.564s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001976, mean_absolute_error: 0.367999, mean_q: -0.423584
  257459/5000000: episode: 4323, duration: 1.734s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001991, mean_absolute_error: 0.370326, mean_q: -0.427354
  257486/5000000: episode: 4324, duration: 1.337s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001898, mean_absolute_error: 0.366968, mean_q: -0.422944
  257516/5000000: episode: 4325, duration: 1.426s, episode steps: 30, steps per second: 21, episode reward: -1.000, me

  258936/5000000: episode: 4350, duration: 1.291s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001999, mean_absolute_error: 0.372742, mean_q: -0.431005
  258998/5000000: episode: 4351, duration: 2.995s, episode steps: 62, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.258 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001983, mean_absolute_error: 0.374295, mean_q: -0.432012
  259058/5000000: episode: 4352, duration: 2.775s, episode steps: 60, steps per second: 22, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001999, mean_absolute_error: 0.375538, mean_q: -0.433338
  259084/5000000: episode: 4353, duration: 1.251s, episode steps: 26, steps per second: 21, episode reward: -1.000, me

  260149/5000000: episode: 4378, duration: 1.495s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.034 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001877, mean_absolute_error: 0.378719, mean_q: -0.438252
  260174/5000000: episode: 4379, duration: 1.234s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001938, mean_absolute_error: 0.379188, mean_q: -0.438909
  260258/5000000: episode: 4380, duration: 4.119s, episode steps: 84, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.512 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001797, mean_absolute_error: 0.378382, mean_q: -0.437669
  260293/5000000: episode: 4381, duration: 1.821s, episode steps: 35, steps per second: 19, episode reward: -1.000, me

  261532/5000000: episode: 4406, duration: 1.249s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001724, mean_absolute_error: 0.385554, mean_q: -0.446945
  261602/5000000: episode: 4407, duration: 3.561s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.186 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001742, mean_absolute_error: 0.385116, mean_q: -0.446869
  261679/5000000: episode: 4408, duration: 3.640s, episode steps: 77, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.299 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001826, mean_absolute_error: 0.386625, mean_q: -0.448967
  261706/5000000: episode: 4409, duration: 1.233s, episode steps: 27, steps per second: 22, episode reward: -1.000, me

  262969/5000000: episode: 4434, duration: 3.292s, episode steps: 69, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.638 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001802, mean_absolute_error: 0.390181, mean_q: -0.454075
  262994/5000000: episode: 4435, duration: 1.255s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001870, mean_absolute_error: 0.389099, mean_q: -0.452639
  263022/5000000: episode: 4436, duration: 1.437s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001783, mean_absolute_error: 0.391812, mean_q: -0.456172
  263140/5000000: episode: 4437, duration: 5.889s, episode steps: 118, steps per second: 20, episode reward: -1.000, m

  264663/5000000: episode: 4462, duration: 4.219s, episode steps: 87, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001804, mean_absolute_error: 0.394651, mean_q: -0.459103
  264690/5000000: episode: 4463, duration: 1.344s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.185 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001704, mean_absolute_error: 0.394047, mean_q: -0.458732
  264715/5000000: episode: 4464, duration: 1.258s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001893, mean_absolute_error: 0.397930, mean_q: -0.462372
  264741/5000000: episode: 4465, duration: 1.286s, episode steps: 26, steps per second: 20, episode reward: -1.000, me

  266221/5000000: episode: 4490, duration: 3.891s, episode steps: 79, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001747, mean_absolute_error: 0.397707, mean_q: -0.463464
  266246/5000000: episode: 4491, duration: 1.270s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001743, mean_absolute_error: 0.396529, mean_q: -0.462310
  266271/5000000: episode: 4492, duration: 1.152s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001590, mean_absolute_error: 0.395965, mean_q: -0.461857
  266469/5000000: episode: 4493, duration: 9.418s, episode steps: 198, steps per second: 21, episode reward: -1.000, m

  267732/5000000: episode: 4518, duration: 1.218s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001662, mean_absolute_error: 0.404999, mean_q: -0.473280
  267766/5000000: episode: 4519, duration: 1.655s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.147 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001617, mean_absolute_error: 0.405310, mean_q: -0.474119
  267799/5000000: episode: 4520, duration: 1.587s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.182 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001622, mean_absolute_error: 0.406124, mean_q: -0.474583
  267898/5000000: episode: 4521, duration: 4.815s, episode steps: 99, steps per second: 21, episode reward: -1.000, me

  269623/5000000: episode: 4546, duration: 1.934s, episode steps: 37, steps per second: 19, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.514 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001486, mean_absolute_error: 0.416374, mean_q: -0.488116
  269750/5000000: episode: 4547, duration: 6.320s, episode steps: 127, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.512 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001581, mean_absolute_error: 0.417679, mean_q: -0.489412
  269784/5000000: episode: 4548, duration: 1.705s, episode steps: 34, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.735 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001474, mean_absolute_error: 0.416505, mean_q: -0.487900
  269900/5000000: episode: 4549, duration: 5.513s, episode steps: 116, steps per second: 21, episode reward: -1.000, 

  271282/5000000: episode: 4574, duration: 1.519s, episode steps: 33, steps per second: 22, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.242 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001533, mean_absolute_error: 0.421245, mean_q: -0.494615
  271309/5000000: episode: 4575, duration: 1.324s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.111 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001529, mean_absolute_error: 0.422693, mean_q: -0.495731
  271387/5000000: episode: 4576, duration: 4.092s, episode steps: 78, steps per second: 19, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001517, mean_absolute_error: 0.422844, mean_q: -0.496608
  271723/5000000: episode: 4577, duration: 15.759s, episode steps: 336, steps per second: 21, episode reward: -1.000, 

  273100/5000000: episode: 4602, duration: 1.488s, episode steps: 27, steps per second: 18, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001549, mean_absolute_error: 0.430431, mean_q: -0.505994
  273160/5000000: episode: 4603, duration: 3.100s, episode steps: 60, steps per second: 19, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.383 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001449, mean_absolute_error: 0.431253, mean_q: -0.507510
  273187/5000000: episode: 4604, duration: 1.287s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.037 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.431223, mean_q: -0.506733
  273216/5000000: episode: 4605, duration: 1.444s, episode steps: 29, steps per second: 20, episode reward: -1.000, me

  274840/5000000: episode: 4630, duration: 1.547s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001346, mean_absolute_error: 0.440305, mean_q: -0.517767
  274872/5000000: episode: 4631, duration: 1.536s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001396, mean_absolute_error: 0.443528, mean_q: -0.521729
  274987/5000000: episode: 4632, duration: 5.414s, episode steps: 115, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.391 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001441, mean_absolute_error: 0.441817, mean_q: -0.519750
  275016/5000000: episode: 4633, duration: 1.507s, episode steps: 29, steps per second: 19, episode reward: -1.000, m

  276329/5000000: episode: 4658, duration: 1.761s, episode steps: 37, steps per second: 21, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.108 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001389, mean_absolute_error: 0.442822, mean_q: -0.520493
  276355/5000000: episode: 4659, duration: 1.195s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001369, mean_absolute_error: 0.443930, mean_q: -0.522368
  276447/5000000: episode: 4660, duration: 4.312s, episode steps: 92, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.543 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001413, mean_absolute_error: 0.442090, mean_q: -0.519475
  276477/5000000: episode: 4661, duration: 1.516s, episode steps: 30, steps per second: 20, episode reward: -1.000, me

  277958/5000000: episode: 4686, duration: 6.138s, episode steps: 129, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.651 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001360, mean_absolute_error: 0.447227, mean_q: -0.526146
  277984/5000000: episode: 4687, duration: 1.210s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001392, mean_absolute_error: 0.445755, mean_q: -0.524312
  278012/5000000: episode: 4688, duration: 10.789s, episode steps: 28, steps per second: 3, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.071 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001358, mean_absolute_error: 0.447808, mean_q: -0.527343
  278038/5000000: episode: 4689, duration: 1.173s, episode steps: 26, steps per second: 22, episode reward: -1.000, m

  279829/5000000: episode: 4714, duration: 4.641s, episode steps: 94, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.404 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001336, mean_absolute_error: 0.452951, mean_q: -0.533331
  279860/5000000: episode: 4715, duration: 1.657s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001320, mean_absolute_error: 0.454488, mean_q: -0.535444
  279967/5000000: episode: 4716, duration: 5.217s, episode steps: 107, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.430 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001316, mean_absolute_error: 0.452969, mean_q: -0.533422
  280112/5000000: episode: 4717, duration: 7.049s, episode steps: 145, steps per second: 21, episode reward: -1.000, 

  281873/5000000: episode: 4742, duration: 4.188s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.373 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.001356, mean_absolute_error: 0.458244, mean_q: -0.539999
  281900/5000000: episode: 4743, duration: 1.293s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001297, mean_absolute_error: 0.454601, mean_q: -0.535507
  281929/5000000: episode: 4744, duration: 1.486s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.207 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001317, mean_absolute_error: 0.456545, mean_q: -0.537560
  281963/5000000: episode: 4745, duration: 1.634s, episode steps: 34, steps per second: 21, episode reward: -1.000, me

  283126/5000000: episode: 4770, duration: 1.344s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.926 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001244, mean_absolute_error: 0.460029, mean_q: -0.542351
  283153/5000000: episode: 4771, duration: 1.271s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001272, mean_absolute_error: 0.461270, mean_q: -0.543590
  283204/5000000: episode: 4772, duration: 2.402s, episode steps: 51, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.137 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001354, mean_absolute_error: 0.461227, mean_q: -0.543904
  283241/5000000: episode: 4773, duration: 1.603s, episode steps: 37, steps per second: 23, episode reward: -1.000, me

  285184/5000000: episode: 4798, duration: 2.716s, episode steps: 58, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.001240, mean_absolute_error: 0.462358, mean_q: -0.545488
  285212/5000000: episode: 4799, duration: 1.406s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001297, mean_absolute_error: 0.463720, mean_q: -0.547377
  285237/5000000: episode: 4800, duration: 1.181s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001356, mean_absolute_error: 0.464981, mean_q: -0.548215
  285296/5000000: episode: 4801, duration: 2.947s, episode steps: 59, steps per second: 20, episode reward: -1.000, me

  286967/5000000: episode: 4826, duration: 4.052s, episode steps: 86, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.477 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001247, mean_absolute_error: 0.468987, mean_q: -0.553777
  287020/5000000: episode: 4827, duration: 2.438s, episode steps: 53, steps per second: 22, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.340 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001268, mean_absolute_error: 0.469684, mean_q: -0.554007
  287071/5000000: episode: 4828, duration: 2.414s, episode steps: 51, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001267, mean_absolute_error: 0.469555, mean_q: -0.554532
  287130/5000000: episode: 4829, duration: 2.821s, episode steps: 59, steps per second: 21, episode reward: -1.000, me

  288505/5000000: episode: 4854, duration: 2.002s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.900 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001195, mean_absolute_error: 0.465429, mean_q: -0.548795
  288535/5000000: episode: 4855, duration: 1.589s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001180, mean_absolute_error: 0.466336, mean_q: -0.550321
  288561/5000000: episode: 4856, duration: 1.233s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.846 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001254, mean_absolute_error: 0.465320, mean_q: -0.548997
  288696/5000000: episode: 4857, duration: 6.549s, episode steps: 135, steps per second: 21, episode reward: -1.000, m

  290518/5000000: episode: 4882, duration: 6.589s, episode steps: 140, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.264 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001255, mean_absolute_error: 0.468999, mean_q: -0.553162
  290549/5000000: episode: 4883, duration: 1.548s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001258, mean_absolute_error: 0.469452, mean_q: -0.554208
  290720/5000000: episode: 4884, duration: 8.236s, episode steps: 171, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001286, mean_absolute_error: 0.469145, mean_q: -0.553741
  290848/5000000: episode: 4885, duration: 6.553s, episode steps: 128, steps per second: 20, episode reward: -1.000,

  292519/5000000: episode: 4910, duration: 1.695s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.371 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001305, mean_absolute_error: 0.467975, mean_q: -0.551994
  292658/5000000: episode: 4911, duration: 6.751s, episode steps: 139, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.583 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001213, mean_absolute_error: 0.469575, mean_q: -0.554578
  292774/5000000: episode: 4912, duration: 5.632s, episode steps: 116, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.853 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001177, mean_absolute_error: 0.469123, mean_q: -0.553699
  292957/5000000: episode: 4913, duration: 8.861s, episode steps: 183, steps per second: 21, episode reward: -1.000,

  294575/5000000: episode: 4938, duration: 1.337s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001207, mean_absolute_error: 0.470842, mean_q: -0.556264
  294633/5000000: episode: 4939, duration: 2.650s, episode steps: 58, steps per second: 22, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.724 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001152, mean_absolute_error: 0.470718, mean_q: -0.556430
  294658/5000000: episode: 4940, duration: 1.229s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001100, mean_absolute_error: 0.469622, mean_q: -0.555068
  294685/5000000: episode: 4941, duration: 1.355s, episode steps: 27, steps per second: 20, episode reward: -1.000, me

  296438/5000000: episode: 4966, duration: 1.500s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.806 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001193, mean_absolute_error: 0.474346, mean_q: -0.560848
  296470/5000000: episode: 4967, duration: 1.574s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.625 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001189, mean_absolute_error: 0.473610, mean_q: -0.559596
  296527/5000000: episode: 4968, duration: 2.771s, episode steps: 57, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.754 [0.000, 5.000], mean observation: 0.076 [0.000, 24.000], loss: 0.001113, mean_absolute_error: 0.473281, mean_q: -0.559627
  296578/5000000: episode: 4969, duration: 2.492s, episode steps: 51, steps per second: 20, episode reward: -1.000, me

  297945/5000000: episode: 4994, duration: 1.413s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.036 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001167, mean_absolute_error: 0.469889, mean_q: -0.554874
  297971/5000000: episode: 4995, duration: 1.232s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001203, mean_absolute_error: 0.471069, mean_q: -0.556601
  298023/5000000: episode: 4996, duration: 2.477s, episode steps: 52, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.865 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001119, mean_absolute_error: 0.469286, mean_q: -0.554559
  298051/5000000: episode: 4997, duration: 1.294s, episode steps: 28, steps per second: 22, episode reward: -1.000, me

  299632/5000000: episode: 5022, duration: 1.542s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.129 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001095, mean_absolute_error: 0.471631, mean_q: -0.557409
  299659/5000000: episode: 5023, duration: 1.314s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001126, mean_absolute_error: 0.473685, mean_q: -0.559657
  299721/5000000: episode: 5024, duration: 3.029s, episode steps: 62, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.548 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001202, mean_absolute_error: 0.473686, mean_q: -0.559771
  299746/5000000: episode: 5025, duration: 1.308s, episode steps: 25, steps per second: 19, episode reward: -1.000, me

  301369/5000000: episode: 5050, duration: 8.877s, episode steps: 176, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.756 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001151, mean_absolute_error: 0.474078, mean_q: -0.560324
  301395/5000000: episode: 5051, duration: 1.327s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001162, mean_absolute_error: 0.473493, mean_q: -0.559659
  301422/5000000: episode: 5052, duration: 1.310s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.074 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001133, mean_absolute_error: 0.470829, mean_q: -0.556555
  301448/5000000: episode: 5053, duration: 1.328s, episode steps: 26, steps per second: 20, episode reward: -1.000, m

  302829/5000000: episode: 5078, duration: 1.388s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.724 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001169, mean_absolute_error: 0.469255, mean_q: -0.553999
  302883/5000000: episode: 5079, duration: 2.591s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001203, mean_absolute_error: 0.470637, mean_q: -0.556120
  302916/5000000: episode: 5080, duration: 1.616s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001218, mean_absolute_error: 0.470035, mean_q: -0.555289
  302999/5000000: episode: 5081, duration: 4.093s, episode steps: 83, steps per second: 20, episode reward: -1.000, me

  304479/5000000: episode: 5106, duration: 1.340s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001179, mean_absolute_error: 0.470221, mean_q: -0.555705
  304635/5000000: episode: 5107, duration: 7.636s, episode steps: 156, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.673 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001221, mean_absolute_error: 0.471311, mean_q: -0.556492
  304748/5000000: episode: 5108, duration: 5.013s, episode steps: 113, steps per second: 23, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001185, mean_absolute_error: 0.469134, mean_q: -0.553884
  304865/5000000: episode: 5109, duration: 5.454s, episode steps: 117, steps per second: 21, episode reward: -1.000,

  306930/5000000: episode: 5134, duration: 2.558s, episode steps: 57, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.544 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001326, mean_absolute_error: 0.464293, mean_q: -0.548729
  306981/5000000: episode: 5135, duration: 2.470s, episode steps: 51, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001268, mean_absolute_error: 0.463819, mean_q: -0.547800
  307006/5000000: episode: 5136, duration: 1.355s, episode steps: 25, steps per second: 18, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.240 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001222, mean_absolute_error: 0.464565, mean_q: -0.549020
  307046/5000000: episode: 5137, duration: 2.014s, episode steps: 40, steps per second: 20, episode reward: -1.000, me

  308839/5000000: episode: 5162, duration: 1.341s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001265, mean_absolute_error: 0.460863, mean_q: -0.544299
  308976/5000000: episode: 5163, duration: 6.728s, episode steps: 137, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.431 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001246, mean_absolute_error: 0.459622, mean_q: -0.542223
  309021/5000000: episode: 5164, duration: 2.377s, episode steps: 45, steps per second: 19, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001206, mean_absolute_error: 0.456956, mean_q: -0.538633
  309052/5000000: episode: 5165, duration: 1.547s, episode steps: 31, steps per second: 20, episode reward: -1.000, m

  310364/5000000: episode: 5190, duration: 9.069s, episode steps: 186, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001261, mean_absolute_error: 0.457237, mean_q: -0.539544
  310393/5000000: episode: 5191, duration: 1.385s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.793 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001127, mean_absolute_error: 0.454952, mean_q: -0.537139
  310418/5000000: episode: 5192, duration: 1.138s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001308, mean_absolute_error: 0.454226, mean_q: -0.536373
  310449/5000000: episode: 5193, duration: 1.396s, episode steps: 31, steps per second: 22, episode reward: -1.000, m

  311919/5000000: episode: 5218, duration: 1.348s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001261, mean_absolute_error: 0.451136, mean_q: -0.532600
  311976/5000000: episode: 5219, duration: 2.780s, episode steps: 57, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001254, mean_absolute_error: 0.448780, mean_q: -0.528757
  312002/5000000: episode: 5220, duration: 1.394s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001194, mean_absolute_error: 0.450082, mean_q: -0.530597
  312123/5000000: episode: 5221, duration: 5.685s, episode steps: 121, steps per second: 21, episode reward: -1.000, m

  313266/5000000: episode: 5246, duration: 1.329s, episode steps: 29, steps per second: 22, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001290, mean_absolute_error: 0.446928, mean_q: -0.527454
  313291/5000000: episode: 5247, duration: 1.179s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.960 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001299, mean_absolute_error: 0.445579, mean_q: -0.525433
  313316/5000000: episode: 5248, duration: 1.213s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001319, mean_absolute_error: 0.444615, mean_q: -0.523747
  313455/5000000: episode: 5249, duration: 6.641s, episode steps: 139, steps per second: 21, episode reward: -1.000, m

  315191/5000000: episode: 5274, duration: 1.240s, episode steps: 28, steps per second: 23, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001334, mean_absolute_error: 0.437420, mean_q: -0.516018
  315217/5000000: episode: 5275, duration: 1.201s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.654 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001324, mean_absolute_error: 0.439894, mean_q: -0.518503
  315482/5000000: episode: 5276, duration: 13.307s, episode steps: 265, steps per second: 20, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.415 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001292, mean_absolute_error: 0.437699, mean_q: -0.515832
  315508/5000000: episode: 5277, duration: 1.354s, episode steps: 26, steps per second: 19, episode reward: -1.000, 

  317199/5000000: episode: 5302, duration: 1.629s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001413, mean_absolute_error: 0.428918, mean_q: -0.504743
  317255/5000000: episode: 5303, duration: 2.717s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.661 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001431, mean_absolute_error: 0.428931, mean_q: -0.504780
  317282/5000000: episode: 5304, duration: 1.329s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001377, mean_absolute_error: 0.427430, mean_q: -0.502786
  317371/5000000: episode: 5305, duration: 4.249s, episode steps: 89, steps per second: 21, episode reward: -1.000, me

  319069/5000000: episode: 5330, duration: 1.419s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.321 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001387, mean_absolute_error: 0.420663, mean_q: -0.495482
  319101/5000000: episode: 5331, duration: 1.362s, episode steps: 32, steps per second: 23, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.844 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001359, mean_absolute_error: 0.422116, mean_q: -0.496431
  319126/5000000: episode: 5332, duration: 1.294s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001370, mean_absolute_error: 0.421643, mean_q: -0.495198
  319153/5000000: episode: 5333, duration: 1.299s, episode steps: 27, steps per second: 21, episode reward: -1.000, me

  320803/5000000: episode: 5358, duration: 8.643s, episode steps: 176, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001519, mean_absolute_error: 0.415800, mean_q: -0.488579
  320834/5000000: episode: 5359, duration: 1.643s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.290 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001522, mean_absolute_error: 0.415222, mean_q: -0.487476
  320910/5000000: episode: 5360, duration: 3.607s, episode steps: 76, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001512, mean_absolute_error: 0.415978, mean_q: -0.487835
  320937/5000000: episode: 5361, duration: 1.335s, episode steps: 27, steps per second: 20, episode reward: -1.000, m

  322698/5000000: episode: 5386, duration: 1.511s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.677 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001568, mean_absolute_error: 0.413078, mean_q: -0.484913
  322724/5000000: episode: 5387, duration: 1.272s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001592, mean_absolute_error: 0.414092, mean_q: -0.487081
  322750/5000000: episode: 5388, duration: 1.304s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001547, mean_absolute_error: 0.413742, mean_q: -0.486584
  322848/5000000: episode: 5389, duration: 4.852s, episode steps: 98, steps per second: 20, episode reward: -1.000, me

  324137/5000000: episode: 5414, duration: 4.089s, episode steps: 86, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.419 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001501, mean_absolute_error: 0.409120, mean_q: -0.480657
  324164/5000000: episode: 5415, duration: 1.218s, episode steps: 27, steps per second: 22, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.741 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001522, mean_absolute_error: 0.407349, mean_q: -0.478494
  324227/5000000: episode: 5416, duration: 2.968s, episode steps: 63, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001507, mean_absolute_error: 0.408716, mean_q: -0.479262
  324334/5000000: episode: 5417, duration: 5.293s, episode steps: 107, steps per second: 20, episode reward: -1.000, m

  325915/5000000: episode: 5442, duration: 2.727s, episode steps: 55, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.582 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001554, mean_absolute_error: 0.406132, mean_q: -0.476855
  325940/5000000: episode: 5443, duration: 1.235s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001580, mean_absolute_error: 0.405847, mean_q: -0.476258
  325965/5000000: episode: 5444, duration: 1.349s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001670, mean_absolute_error: 0.406239, mean_q: -0.475466
  325991/5000000: episode: 5445, duration: 1.251s, episode steps: 26, steps per second: 21, episode reward: -1.000, me

  327563/5000000: episode: 5470, duration: 2.865s, episode steps: 60, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.076 [0.000, 24.000], loss: 0.001557, mean_absolute_error: 0.403068, mean_q: -0.473474
  327635/5000000: episode: 5471, duration: 3.517s, episode steps: 72, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.431 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001626, mean_absolute_error: 0.404534, mean_q: -0.475079
  327667/5000000: episode: 5472, duration: 1.478s, episode steps: 32, steps per second: 22, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.375 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001531, mean_absolute_error: 0.403015, mean_q: -0.472095
  327706/5000000: episode: 5473, duration: 1.852s, episode steps: 39, steps per second: 21, episode reward: -1.000, me

  329630/5000000: episode: 5498, duration: 1.245s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.160 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.400913, mean_q: -0.469978
  329661/5000000: episode: 5499, duration: 1.508s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001613, mean_absolute_error: 0.398037, mean_q: -0.466828
  329690/5000000: episode: 5500, duration: 1.426s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.690 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001536, mean_absolute_error: 0.398917, mean_q: -0.467940
  329717/5000000: episode: 5501, duration: 1.385s, episode steps: 27, steps per second: 19, episode reward: -1.000, me

  331419/5000000: episode: 5526, duration: 1.639s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.398384, mean_q: -0.466736
  331453/5000000: episode: 5527, duration: 1.650s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001578, mean_absolute_error: 0.398212, mean_q: -0.466184
  331479/5000000: episode: 5528, duration: 1.300s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.038 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001522, mean_absolute_error: 0.394862, mean_q: -0.460948
  331505/5000000: episode: 5529, duration: 1.274s, episode steps: 26, steps per second: 20, episode reward: -1.000, me

  333200/5000000: episode: 5554, duration: 2.527s, episode steps: 51, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.922 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.396987, mean_q: -0.465748
  333228/5000000: episode: 5555, duration: 1.296s, episode steps: 28, steps per second: 22, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.964 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001654, mean_absolute_error: 0.394484, mean_q: -0.462502
  333282/5000000: episode: 5556, duration: 2.695s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.259 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001614, mean_absolute_error: 0.395716, mean_q: -0.462114
  333336/5000000: episode: 5557, duration: 2.674s, episode steps: 54, steps per second: 20, episode reward: -1.000, me

  335189/5000000: episode: 5582, duration: 7.455s, episode steps: 157, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.420 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001661, mean_absolute_error: 0.389011, mean_q: -0.455394
  335217/5000000: episode: 5583, duration: 1.477s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.357 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001503, mean_absolute_error: 0.386709, mean_q: -0.451855
  335274/5000000: episode: 5584, duration: 2.845s, episode steps: 57, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.351 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001733, mean_absolute_error: 0.391806, mean_q: -0.459113
  335358/5000000: episode: 5585, duration: 3.707s, episode steps: 84, steps per second: 23, episode reward: -1.000, m

  336804/5000000: episode: 5610, duration: 7.116s, episode steps: 146, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.568 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001625, mean_absolute_error: 0.386918, mean_q: -0.452662
  336839/5000000: episode: 5611, duration: 1.794s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.486 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001646, mean_absolute_error: 0.386893, mean_q: -0.453970
  336906/5000000: episode: 5612, duration: 3.214s, episode steps: 67, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.224 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001668, mean_absolute_error: 0.388527, mean_q: -0.455349
  336932/5000000: episode: 5613, duration: 1.285s, episode steps: 26, steps per second: 20, episode reward: -1.000, m

  338194/5000000: episode: 5638, duration: 1.267s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001753, mean_absolute_error: 0.391788, mean_q: -0.457142
  338222/5000000: episode: 5639, duration: 1.487s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001694, mean_absolute_error: 0.388588, mean_q: -0.454472
  338250/5000000: episode: 5640, duration: 1.393s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001558, mean_absolute_error: 0.391871, mean_q: -0.458446
  338277/5000000: episode: 5641, duration: 1.238s, episode steps: 27, steps per second: 22, episode reward: -1.000, me

  339959/5000000: episode: 5666, duration: 4.747s, episode steps: 94, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.521 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001650, mean_absolute_error: 0.390117, mean_q: -0.456510
  339999/5000000: episode: 5667, duration: 1.877s, episode steps: 40, steps per second: 21, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.700 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001605, mean_absolute_error: 0.391212, mean_q: -0.457688
  340029/5000000: episode: 5668, duration: 1.551s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001579, mean_absolute_error: 0.389908, mean_q: -0.456452
  340125/5000000: episode: 5669, duration: 4.865s, episode steps: 96, steps per second: 20, episode reward: -1.000, me

  341428/5000000: episode: 5694, duration: 1.480s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001709, mean_absolute_error: 0.393555, mean_q: -0.460226
  341525/5000000: episode: 5695, duration: 4.567s, episode steps: 97, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.814 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001645, mean_absolute_error: 0.388522, mean_q: -0.453934
  341645/5000000: episode: 5696, duration: 5.952s, episode steps: 120, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001637, mean_absolute_error: 0.389521, mean_q: -0.455369
  341827/5000000: episode: 5697, duration: 8.966s, episode steps: 182, steps per second: 20, episode reward: -1.000, 

  343241/5000000: episode: 5722, duration: 3.444s, episode steps: 72, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.389 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001778, mean_absolute_error: 0.390478, mean_q: -0.454939
  343349/5000000: episode: 5723, duration: 5.170s, episode steps: 108, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.648 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001738, mean_absolute_error: 0.390323, mean_q: -0.455502
  343544/5000000: episode: 5724, duration: 9.334s, episode steps: 195, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001712, mean_absolute_error: 0.390034, mean_q: -0.455161
  343641/5000000: episode: 5725, duration: 4.545s, episode steps: 97, steps per second: 21, episode reward: -1.000, 

  345424/5000000: episode: 5750, duration: 3.648s, episode steps: 77, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001776, mean_absolute_error: 0.387583, mean_q: -0.451233
  345580/5000000: episode: 5751, duration: 7.797s, episode steps: 156, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001702, mean_absolute_error: 0.387635, mean_q: -0.452433
  345609/5000000: episode: 5752, duration: 1.537s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001746, mean_absolute_error: 0.387213, mean_q: -0.451493
  345638/5000000: episode: 5753, duration: 1.325s, episode steps: 29, steps per second: 22, episode reward: -1.000, m

  347503/5000000: episode: 5778, duration: 3.515s, episode steps: 71, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.183 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001755, mean_absolute_error: 0.381730, mean_q: -0.444136
  347536/5000000: episode: 5779, duration: 1.554s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001713, mean_absolute_error: 0.381572, mean_q: -0.444612
  347659/5000000: episode: 5780, duration: 6.039s, episode steps: 123, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.537 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001755, mean_absolute_error: 0.382860, mean_q: -0.444892
  347686/5000000: episode: 5781, duration: 1.277s, episode steps: 27, steps per second: 21, episode reward: -1.000, m

  349292/5000000: episode: 5806, duration: 5.536s, episode steps: 112, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.527 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001853, mean_absolute_error: 0.381924, mean_q: -0.444981
  349332/5000000: episode: 5807, duration: 2.026s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.575 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001729, mean_absolute_error: 0.383180, mean_q: -0.446765
  349477/5000000: episode: 5808, duration: 7.238s, episode steps: 145, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.469 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001771, mean_absolute_error: 0.381730, mean_q: -0.444248
  349502/5000000: episode: 5809, duration: 1.158s, episode steps: 25, steps per second: 22, episode reward: -1.000, 

  350693/5000000: episode: 5834, duration: 1.847s, episode steps: 39, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001801, mean_absolute_error: 0.380934, mean_q: -0.443917
  350817/5000000: episode: 5835, duration: 6.162s, episode steps: 124, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.484 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.001813, mean_absolute_error: 0.384053, mean_q: -0.447454
  350870/5000000: episode: 5836, duration: 2.581s, episode steps: 53, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.019 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001778, mean_absolute_error: 0.382757, mean_q: -0.445727
  350976/5000000: episode: 5837, duration: 5.280s, episode steps: 106, steps per second: 20, episode reward: -1.000, 

  352367/5000000: episode: 5862, duration: 1.235s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [1.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001855, mean_absolute_error: 0.384706, mean_q: -0.447831
  352509/5000000: episode: 5863, duration: 7.061s, episode steps: 142, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.725 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001753, mean_absolute_error: 0.384395, mean_q: -0.447338
  352560/5000000: episode: 5864, duration: 2.444s, episode steps: 51, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.176 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001709, mean_absolute_error: 0.384413, mean_q: -0.447323
  352606/5000000: episode: 5865, duration: 2.072s, episode steps: 46, steps per second: 22, episode reward: -1.000, m

  353925/5000000: episode: 5890, duration: 1.324s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.741 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001639, mean_absolute_error: 0.388390, mean_q: -0.452389
  353962/5000000: episode: 5891, duration: 1.832s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 1.919 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001769, mean_absolute_error: 0.389376, mean_q: -0.453702
  353989/5000000: episode: 5892, duration: 1.294s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001716, mean_absolute_error: 0.388361, mean_q: -0.451039
  354075/5000000: episode: 5893, duration: 4.154s, episode steps: 86, steps per second: 21, episode reward: -1.000, me

  355612/5000000: episode: 5918, duration: 2.855s, episode steps: 57, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001767, mean_absolute_error: 0.396579, mean_q: -0.462482
  355739/5000000: episode: 5919, duration: 6.194s, episode steps: 127, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.378 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001738, mean_absolute_error: 0.397201, mean_q: -0.463542
  355765/5000000: episode: 5920, duration: 1.375s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001648, mean_absolute_error: 0.396870, mean_q: -0.462364
  355829/5000000: episode: 5921, duration: 3.160s, episode steps: 64, steps per second: 20, episode reward: -1.000, m

  357257/5000000: episode: 5946, duration: 3.193s, episode steps: 71, steps per second: 22, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.465 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001708, mean_absolute_error: 0.399254, mean_q: -0.465683
  357351/5000000: episode: 5947, duration: 4.513s, episode steps: 94, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.734 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001786, mean_absolute_error: 0.401150, mean_q: -0.467788
  357382/5000000: episode: 5948, duration: 1.552s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.677 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001792, mean_absolute_error: 0.401191, mean_q: -0.468580
  357411/5000000: episode: 5949, duration: 1.397s, episode steps: 29, steps per second: 21, episode reward: -1.000, me

  358690/5000000: episode: 5974, duration: 1.370s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001730, mean_absolute_error: 0.403131, mean_q: -0.471047
  358779/5000000: episode: 5975, duration: 4.445s, episode steps: 89, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.596 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001797, mean_absolute_error: 0.402601, mean_q: -0.470734
  358804/5000000: episode: 5976, duration: 1.192s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001650, mean_absolute_error: 0.404516, mean_q: -0.471829
  358921/5000000: episode: 5977, duration: 5.523s, episode steps: 117, steps per second: 21, episode reward: -1.000, m

  361061/5000000: episode: 6002, duration: 2.685s, episode steps: 57, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001804, mean_absolute_error: 0.399991, mean_q: -0.467251
  361280/5000000: episode: 6003, duration: 10.164s, episode steps: 219, steps per second: 22, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.315 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001761, mean_absolute_error: 0.400163, mean_q: -0.467012
  361305/5000000: episode: 6004, duration: 1.233s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001678, mean_absolute_error: 0.403698, mean_q: -0.471924
  361392/5000000: episode: 6005, duration: 4.254s, episode steps: 87, steps per second: 20, episode reward: -1.000, 

  362806/5000000: episode: 6030, duration: 1.492s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001820, mean_absolute_error: 0.396413, mean_q: -0.462707
  362833/5000000: episode: 6031, duration: 1.372s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.889 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001726, mean_absolute_error: 0.395217, mean_q: -0.461497
  362916/5000000: episode: 6032, duration: 4.084s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.711 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001661, mean_absolute_error: 0.396966, mean_q: -0.463413
  362944/5000000: episode: 6033, duration: 1.462s, episode steps: 28, steps per second: 19, episode reward: -1.000, me

  364778/5000000: episode: 6058, duration: 1.129s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.320 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001725, mean_absolute_error: 0.399167, mean_q: -0.467214
  364812/5000000: episode: 6059, duration: 1.751s, episode steps: 34, steps per second: 19, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.029 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001707, mean_absolute_error: 0.401349, mean_q: -0.469772
  364898/5000000: episode: 6060, duration: 4.101s, episode steps: 86, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.767 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001716, mean_absolute_error: 0.401712, mean_q: -0.469871
  365040/5000000: episode: 6061, duration: 6.830s, episode steps: 142, steps per second: 21, episode reward: -1.000, m

  366885/5000000: episode: 6086, duration: 1.594s, episode steps: 35, steps per second: 22, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.543 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001704, mean_absolute_error: 0.404017, mean_q: -0.472489
  366973/5000000: episode: 6087, duration: 4.241s, episode steps: 88, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.886 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001700, mean_absolute_error: 0.402772, mean_q: -0.471275
  366998/5000000: episode: 6088, duration: 1.150s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001697, mean_absolute_error: 0.402495, mean_q: -0.470957
  367054/5000000: episode: 6089, duration: 2.696s, episode steps: 56, steps per second: 21, episode reward: -1.000, me

  368715/5000000: episode: 6114, duration: 1.294s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001685, mean_absolute_error: 0.403719, mean_q: -0.472205
  368795/5000000: episode: 6115, duration: 3.984s, episode steps: 80, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.612 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001682, mean_absolute_error: 0.404656, mean_q: -0.474295
  368851/5000000: episode: 6116, duration: 2.825s, episode steps: 56, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.732 [0.000, 5.000], mean observation: 0.077 [0.000, 24.000], loss: 0.001525, mean_absolute_error: 0.406032, mean_q: -0.475937
  368918/5000000: episode: 6117, duration: 3.208s, episode steps: 67, steps per second: 21, episode reward: -1.000, me

  370711/5000000: episode: 6142, duration: 2.435s, episode steps: 52, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001620, mean_absolute_error: 0.402724, mean_q: -0.470459
  370743/5000000: episode: 6143, duration: 1.613s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.812 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.400097, mean_q: -0.467887
  370781/5000000: episode: 6144, duration: 1.868s, episode steps: 38, steps per second: 20, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.368 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001689, mean_absolute_error: 0.401588, mean_q: -0.469040
  370809/5000000: episode: 6145, duration: 1.347s, episode steps: 28, steps per second: 21, episode reward: -1.000, me

  372627/5000000: episode: 6170, duration: 1.139s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.320 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001632, mean_absolute_error: 0.402433, mean_q: -0.471123
  372746/5000000: episode: 6171, duration: 5.744s, episode steps: 119, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001654, mean_absolute_error: 0.399349, mean_q: -0.466750
  372773/5000000: episode: 6172, duration: 1.358s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.037 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001773, mean_absolute_error: 0.401364, mean_q: -0.469012
  372802/5000000: episode: 6173, duration: 1.431s, episode steps: 29, steps per second: 20, episode reward: -1.000, m

  374613/5000000: episode: 6198, duration: 4.389s, episode steps: 91, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.198 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001695, mean_absolute_error: 0.396612, mean_q: -0.462699
  374728/5000000: episode: 6199, duration: 5.471s, episode steps: 115, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.557 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001658, mean_absolute_error: 0.396579, mean_q: -0.462870
  374754/5000000: episode: 6200, duration: 1.263s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 1.962 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001751, mean_absolute_error: 0.396691, mean_q: -0.462725
  374842/5000000: episode: 6201, duration: 4.351s, episode steps: 88, steps per second: 20, episode reward: -1.000, m

  376360/5000000: episode: 6226, duration: 1.893s, episode steps: 39, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.179 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001796, mean_absolute_error: 0.400713, mean_q: -0.468765
  376388/5000000: episode: 6227, duration: 1.400s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.321 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001741, mean_absolute_error: 0.399225, mean_q: -0.465398
  376509/5000000: episode: 6228, duration: 5.816s, episode steps: 121, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.463 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001669, mean_absolute_error: 0.399577, mean_q: -0.467271
  376591/5000000: episode: 6229, duration: 4.012s, episode steps: 82, steps per second: 20, episode reward: -1.000, m

  378134/5000000: episode: 6254, duration: 1.564s, episode steps: 34, steps per second: 22, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.559 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001678, mean_absolute_error: 0.396778, mean_q: -0.462245
  378202/5000000: episode: 6255, duration: 3.208s, episode steps: 68, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.618 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001693, mean_absolute_error: 0.395636, mean_q: -0.461478
  378237/5000000: episode: 6256, duration: 1.829s, episode steps: 35, steps per second: 19, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.314 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001617, mean_absolute_error: 0.395264, mean_q: -0.459675
  378279/5000000: episode: 6257, duration: 2.059s, episode steps: 42, steps per second: 20, episode reward: -1.000, me

  379243/5000000: episode: 6282, duration: 1.327s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.143 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001595, mean_absolute_error: 0.399857, mean_q: -0.468413
  379387/5000000: episode: 6283, duration: 7.012s, episode steps: 144, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.590 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001720, mean_absolute_error: 0.398663, mean_q: -0.465889
  379484/5000000: episode: 6284, duration: 4.533s, episode steps: 97, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.464 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.398407, mean_q: -0.465968
  379509/5000000: episode: 6285, duration: 1.271s, episode steps: 25, steps per second: 20, episode reward: -1.000, m

  381160/5000000: episode: 6310, duration: 1.662s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.061 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001604, mean_absolute_error: 0.402510, mean_q: -0.470020
  381195/5000000: episode: 6311, duration: 1.593s, episode steps: 35, steps per second: 22, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001725, mean_absolute_error: 0.403158, mean_q: -0.471893
  381297/5000000: episode: 6312, duration: 4.532s, episode steps: 102, steps per second: 23, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.402 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001636, mean_absolute_error: 0.402464, mean_q: -0.470704
  381376/5000000: episode: 6313, duration: 3.927s, episode steps: 79, steps per second: 20, episode reward: -1.000, m

  382599/5000000: episode: 6338, duration: 1.319s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001596, mean_absolute_error: 0.404291, mean_q: -0.471718
  382639/5000000: episode: 6339, duration: 2.005s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.150 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001555, mean_absolute_error: 0.403896, mean_q: -0.472011
  382670/5000000: episode: 6340, duration: 1.513s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.226 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001622, mean_absolute_error: 0.405686, mean_q: -0.474765
  382741/5000000: episode: 6341, duration: 3.337s, episode steps: 71, steps per second: 21, episode reward: -1.000, me

  383983/5000000: episode: 6366, duration: 1.975s, episode steps: 42, steps per second: 21, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.310 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001461, mean_absolute_error: 0.406742, mean_q: -0.476219
  384034/5000000: episode: 6367, duration: 2.320s, episode steps: 51, steps per second: 22, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.588 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001621, mean_absolute_error: 0.410170, mean_q: -0.480732
  384089/5000000: episode: 6368, duration: 2.767s, episode steps: 55, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001564, mean_absolute_error: 0.409997, mean_q: -0.480290
  384117/5000000: episode: 6369, duration: 1.340s, episode steps: 28, steps per second: 21, episode reward: -1.000, me

  385611/5000000: episode: 6394, duration: 2.472s, episode steps: 52, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.615 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001577, mean_absolute_error: 0.407273, mean_q: -0.476323
  385664/5000000: episode: 6395, duration: 2.521s, episode steps: 53, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.679 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001564, mean_absolute_error: 0.408395, mean_q: -0.477569
  385749/5000000: episode: 6396, duration: 4.252s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.824 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001530, mean_absolute_error: 0.406247, mean_q: -0.474588
  385868/5000000: episode: 6397, duration: 5.525s, episode steps: 119, steps per second: 22, episode reward: -1.000, m

  387333/5000000: episode: 6422, duration: 3.024s, episode steps: 62, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.065 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001589, mean_absolute_error: 0.406398, mean_q: -0.476131
  387363/5000000: episode: 6423, duration: 1.520s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001650, mean_absolute_error: 0.406193, mean_q: -0.474730
  387389/5000000: episode: 6424, duration: 1.266s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001470, mean_absolute_error: 0.402585, mean_q: -0.470238
  387414/5000000: episode: 6425, duration: 1.185s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  388874/5000000: episode: 6450, duration: 8.704s, episode steps: 181, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.365 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001523, mean_absolute_error: 0.404548, mean_q: -0.472757
  388902/5000000: episode: 6451, duration: 1.416s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001666, mean_absolute_error: 0.405567, mean_q: -0.474557
  389018/5000000: episode: 6452, duration: 5.189s, episode steps: 116, steps per second: 22, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001583, mean_absolute_error: 0.404275, mean_q: -0.472458
  389115/5000000: episode: 6453, duration: 4.698s, episode steps: 97, steps per second: 21, episode reward: -1.000, 

  390662/5000000: episode: 6478, duration: 1.356s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001696, mean_absolute_error: 0.398726, mean_q: -0.464970
  390687/5000000: episode: 6479, duration: 1.259s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.401331, mean_q: -0.467867
  390756/5000000: episode: 6480, duration: 3.402s, episode steps: 69, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.174 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001652, mean_absolute_error: 0.402672, mean_q: -0.470315
  390788/5000000: episode: 6481, duration: 1.600s, episode steps: 32, steps per second: 20, episode reward: -1.000, me

  392300/5000000: episode: 6506, duration: 1.426s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.138 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001657, mean_absolute_error: 0.394973, mean_q: -0.460539
  392356/5000000: episode: 6507, duration: 2.665s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001641, mean_absolute_error: 0.394692, mean_q: -0.460251
  392384/5000000: episode: 6508, duration: 1.267s, episode steps: 28, steps per second: 22, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.893 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.393344, mean_q: -0.458183
  392445/5000000: episode: 6509, duration: 2.982s, episode steps: 61, steps per second: 20, episode reward: -1.000, me

  394029/5000000: episode: 6534, duration: 1.820s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.605 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001717, mean_absolute_error: 0.393242, mean_q: -0.458658
  394088/5000000: episode: 6535, duration: 2.854s, episode steps: 59, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.576 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001756, mean_absolute_error: 0.395313, mean_q: -0.461186
  394114/5000000: episode: 6536, duration: 1.395s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001668, mean_absolute_error: 0.392096, mean_q: -0.456935
  394140/5000000: episode: 6537, duration: 1.274s, episode steps: 26, steps per second: 20, episode reward: -1.000, me

  395305/5000000: episode: 6562, duration: 1.425s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.207 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001701, mean_absolute_error: 0.394821, mean_q: -0.459052
  395497/5000000: episode: 6563, duration: 9.247s, episode steps: 192, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001759, mean_absolute_error: 0.392979, mean_q: -0.457231
  395550/5000000: episode: 6564, duration: 2.416s, episode steps: 53, steps per second: 22, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001695, mean_absolute_error: 0.395202, mean_q: -0.460027
  395607/5000000: episode: 6565, duration: 2.733s, episode steps: 57, steps per second: 21, episode reward: -1.000, m

  397364/5000000: episode: 6590, duration: 8.667s, episode steps: 173, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.376 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001801, mean_absolute_error: 0.390046, mean_q: -0.452975
  397396/5000000: episode: 6591, duration: 1.546s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001745, mean_absolute_error: 0.389887, mean_q: -0.452990
  397425/5000000: episode: 6592, duration: 1.398s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001781, mean_absolute_error: 0.391619, mean_q: -0.455425
  397453/5000000: episode: 6593, duration: 1.376s, episode steps: 28, steps per second: 20, episode reward: -1.000, m

  399189/5000000: episode: 6618, duration: 1.280s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001735, mean_absolute_error: 0.382342, mean_q: -0.443761
  399235/5000000: episode: 6619, duration: 2.193s, episode steps: 46, steps per second: 21, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 3.130 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001792, mean_absolute_error: 0.384746, mean_q: -0.447070
  399272/5000000: episode: 6620, duration: 1.845s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.216 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001778, mean_absolute_error: 0.384460, mean_q: -0.445535
  399302/5000000: episode: 6621, duration: 1.591s, episode steps: 30, steps per second: 19, episode reward: -1.000, me

  400791/5000000: episode: 6646, duration: 5.989s, episode steps: 124, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001782, mean_absolute_error: 0.379656, mean_q: -0.438423
  400855/5000000: episode: 6647, duration: 3.188s, episode steps: 64, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.266 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001766, mean_absolute_error: 0.378411, mean_q: -0.436356
  400880/5000000: episode: 6648, duration: 1.227s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001812, mean_absolute_error: 0.377731, mean_q: -0.435880
  400905/5000000: episode: 6649, duration: 1.131s, episode steps: 25, steps per second: 22, episode reward: -1.000, m

  402139/5000000: episode: 6674, duration: 10.156s, episode steps: 216, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.412 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001848, mean_absolute_error: 0.376544, mean_q: -0.434241
  402226/5000000: episode: 6675, duration: 4.159s, episode steps: 87, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001746, mean_absolute_error: 0.375445, mean_q: -0.433150
  402262/5000000: episode: 6676, duration: 1.750s, episode steps: 36, steps per second: 21, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001779, mean_absolute_error: 0.375175, mean_q: -0.432830
  402292/5000000: episode: 6677, duration: 1.340s, episode steps: 30, steps per second: 22, episode reward: -1.000, 

  404214/5000000: episode: 6702, duration: 2.932s, episode steps: 57, steps per second: 19, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.211 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001872, mean_absolute_error: 0.371295, mean_q: -0.428033
  404279/5000000: episode: 6703, duration: 3.087s, episode steps: 65, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.569 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001924, mean_absolute_error: 0.372209, mean_q: -0.428410
  404311/5000000: episode: 6704, duration: 1.493s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001849, mean_absolute_error: 0.370593, mean_q: -0.425355
  404372/5000000: episode: 6705, duration: 2.892s, episode steps: 61, steps per second: 21, episode reward: -1.000, me

  406017/5000000: episode: 6730, duration: 2.606s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.037 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001903, mean_absolute_error: 0.370403, mean_q: -0.424650
  406046/5000000: episode: 6731, duration: 1.446s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.552 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001964, mean_absolute_error: 0.369750, mean_q: -0.423874
  406077/5000000: episode: 6732, duration: 1.468s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.194 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001931, mean_absolute_error: 0.370413, mean_q: -0.425978
  406104/5000000: episode: 6733, duration: 1.315s, episode steps: 27, steps per second: 21, episode reward: -1.000, me

  407360/5000000: episode: 6758, duration: 2.816s, episode steps: 60, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.083 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.002051, mean_absolute_error: 0.369104, mean_q: -0.423329
  407393/5000000: episode: 6759, duration: 1.619s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001964, mean_absolute_error: 0.366840, mean_q: -0.419850
  407469/5000000: episode: 6760, duration: 3.669s, episode steps: 76, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.658 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001974, mean_absolute_error: 0.366875, mean_q: -0.419716
  407529/5000000: episode: 6761, duration: 3.000s, episode steps: 60, steps per second: 20, episode reward: -1.000, me

  408933/5000000: episode: 6786, duration: 1.765s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002024, mean_absolute_error: 0.366870, mean_q: -0.419699
  408958/5000000: episode: 6787, duration: 1.223s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002052, mean_absolute_error: 0.365440, mean_q: -0.418909
  408983/5000000: episode: 6788, duration: 1.279s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.320 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002073, mean_absolute_error: 0.365250, mean_q: -0.417425
  409008/5000000: episode: 6789, duration: 1.227s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  410738/5000000: episode: 6814, duration: 4.654s, episode steps: 101, steps per second: 22, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.002063, mean_absolute_error: 0.363386, mean_q: -0.415289
  410806/5000000: episode: 6815, duration: 3.353s, episode steps: 68, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.632 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002043, mean_absolute_error: 0.363806, mean_q: -0.415486
  410864/5000000: episode: 6816, duration: 2.854s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002087, mean_absolute_error: 0.363085, mean_q: -0.414746
  410889/5000000: episode: 6817, duration: 1.304s, episode steps: 25, steps per second: 19, episode reward: -1.000, m

  412311/5000000: episode: 6842, duration: 2.903s, episode steps: 57, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002159, mean_absolute_error: 0.357033, mean_q: -0.404752
  412392/5000000: episode: 6843, duration: 4.066s, episode steps: 81, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.605 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.002110, mean_absolute_error: 0.357203, mean_q: -0.405857
  412417/5000000: episode: 6844, duration: 1.122s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002234, mean_absolute_error: 0.360913, mean_q: -0.408934
  412446/5000000: episode: 6845, duration: 1.333s, episode steps: 29, steps per second: 22, episode reward: -1.000, me

  414245/5000000: episode: 6870, duration: 1.587s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002337, mean_absolute_error: 0.361453, mean_q: -0.409719
  414344/5000000: episode: 6871, duration: 4.983s, episode steps: 99, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.404 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002111, mean_absolute_error: 0.358285, mean_q: -0.406330
  414458/5000000: episode: 6872, duration: 5.752s, episode steps: 114, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.842 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002150, mean_absolute_error: 0.359625, mean_q: -0.407872
  414585/5000000: episode: 6873, duration: 6.456s, episode steps: 127, steps per second: 20, episode reward: -1.000, 

  416510/5000000: episode: 6898, duration: 2.740s, episode steps: 56, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002329, mean_absolute_error: 0.350379, mean_q: -0.395725
  416537/5000000: episode: 6899, duration: 1.243s, episode steps: 27, steps per second: 22, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002239, mean_absolute_error: 0.351957, mean_q: -0.397670
  416565/5000000: episode: 6900, duration: 1.354s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.857 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.002318, mean_absolute_error: 0.351170, mean_q: -0.396783
  416590/5000000: episode: 6901, duration: 1.190s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  418211/5000000: episode: 6926, duration: 1.612s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.062 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002069, mean_absolute_error: 0.351408, mean_q: -0.396402
  418246/5000000: episode: 6927, duration: 1.717s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002286, mean_absolute_error: 0.348693, mean_q: -0.393587
  418272/5000000: episode: 6928, duration: 1.349s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002298, mean_absolute_error: 0.353032, mean_q: -0.399520
  418339/5000000: episode: 6929, duration: 3.387s, episode steps: 67, steps per second: 20, episode reward: -1.000, me

  419790/5000000: episode: 6954, duration: 3.465s, episode steps: 66, steps per second: 19, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.924 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002320, mean_absolute_error: 0.350321, mean_q: -0.395284
  419816/5000000: episode: 6955, duration: 1.258s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.038 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002153, mean_absolute_error: 0.353247, mean_q: -0.397974
  419874/5000000: episode: 6956, duration: 2.936s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.948 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002135, mean_absolute_error: 0.350474, mean_q: -0.394985
  419903/5000000: episode: 6957, duration: 1.450s, episode steps: 29, steps per second: 20, episode reward: -1.000, me

  421766/5000000: episode: 6982, duration: 1.342s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.154 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002270, mean_absolute_error: 0.352506, mean_q: -0.398034
  421794/5000000: episode: 6983, duration: 1.356s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002353, mean_absolute_error: 0.352690, mean_q: -0.398420
  421851/5000000: episode: 6984, duration: 2.623s, episode steps: 57, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.526 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002413, mean_absolute_error: 0.354028, mean_q: -0.399325
  421882/5000000: episode: 6985, duration: 1.467s, episode steps: 31, steps per second: 21, episode reward: -1.000, me

  423129/5000000: episode: 7010, duration: 2.471s, episode steps: 55, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.002277, mean_absolute_error: 0.358603, mean_q: -0.403898
  423187/5000000: episode: 7011, duration: 2.843s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.002328, mean_absolute_error: 0.358426, mean_q: -0.403925
  423215/5000000: episode: 7012, duration: 1.346s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.179 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002085, mean_absolute_error: 0.357938, mean_q: -0.405212
  423240/5000000: episode: 7013, duration: 1.212s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  424427/5000000: episode: 7038, duration: 7.658s, episode steps: 157, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.002220, mean_absolute_error: 0.362804, mean_q: -0.410338
  424511/5000000: episode: 7039, duration: 4.118s, episode steps: 84, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.738 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002267, mean_absolute_error: 0.363750, mean_q: -0.411313
  424605/5000000: episode: 7040, duration: 4.232s, episode steps: 94, steps per second: 22, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002190, mean_absolute_error: 0.363021, mean_q: -0.410078
  424659/5000000: episode: 7041, duration: 2.609s, episode steps: 54, steps per second: 21, episode reward: -1.000, m

  426490/5000000: episode: 7066, duration: 4.371s, episode steps: 88, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.045 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002213, mean_absolute_error: 0.369291, mean_q: -0.419474
  426664/5000000: episode: 7067, duration: 8.095s, episode steps: 174, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.534 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.002256, mean_absolute_error: 0.370000, mean_q: -0.419345
  426695/5000000: episode: 7068, duration: 1.607s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002237, mean_absolute_error: 0.371415, mean_q: -0.422881
  426760/5000000: episode: 7069, duration: 3.044s, episode steps: 65, steps per second: 21, episode reward: -1.000, m

  428283/5000000: episode: 7094, duration: 2.929s, episode steps: 60, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.567 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002211, mean_absolute_error: 0.370467, mean_q: -0.421704
  428379/5000000: episode: 7095, duration: 4.551s, episode steps: 96, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.615 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002214, mean_absolute_error: 0.369059, mean_q: -0.419426
  428406/5000000: episode: 7096, duration: 1.307s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002093, mean_absolute_error: 0.369041, mean_q: -0.420266
  428503/5000000: episode: 7097, duration: 4.592s, episode steps: 97, steps per second: 21, episode reward: -1.000, me

  429786/5000000: episode: 7122, duration: 1.350s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.714 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002235, mean_absolute_error: 0.369150, mean_q: -0.420133
  429848/5000000: episode: 7123, duration: 3.108s, episode steps: 62, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.002115, mean_absolute_error: 0.372007, mean_q: -0.422847
  429875/5000000: episode: 7124, duration: 1.466s, episode steps: 27, steps per second: 18, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.296 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002157, mean_absolute_error: 0.371718, mean_q: -0.423671
  429959/5000000: episode: 7125, duration: 3.965s, episode steps: 84, steps per second: 21, episode reward: -1.000, me

  431452/5000000: episode: 7150, duration: 2.888s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.002148, mean_absolute_error: 0.373440, mean_q: -0.426540
  431554/5000000: episode: 7151, duration: 5.049s, episode steps: 102, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.461 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.002159, mean_absolute_error: 0.373384, mean_q: -0.425226
  431589/5000000: episode: 7152, duration: 1.737s, episode steps: 35, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.002115, mean_absolute_error: 0.372875, mean_q: -0.426007
  431619/5000000: episode: 7153, duration: 1.399s, episode steps: 30, steps per second: 21, episode reward: -1.000, m

  432818/5000000: episode: 7178, duration: 2.641s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.852 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001944, mean_absolute_error: 0.375393, mean_q: -0.429006
  432903/5000000: episode: 7179, duration: 4.164s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.435 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001998, mean_absolute_error: 0.375532, mean_q: -0.429623
  432943/5000000: episode: 7180, duration: 2.032s, episode steps: 40, steps per second: 20, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 1.700 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002057, mean_absolute_error: 0.378475, mean_q: -0.433078
  433031/5000000: episode: 7181, duration: 4.005s, episode steps: 88, steps per second: 22, episode reward: -1.000, me

  434601/5000000: episode: 7206, duration: 3.839s, episode steps: 78, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.397 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001869, mean_absolute_error: 0.377817, mean_q: -0.432341
  434636/5000000: episode: 7207, duration: 1.590s, episode steps: 35, steps per second: 22, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.743 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001931, mean_absolute_error: 0.379421, mean_q: -0.433161
  434795/5000000: episode: 7208, duration: 7.654s, episode steps: 159, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.660 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002021, mean_absolute_error: 0.378318, mean_q: -0.433087
  434822/5000000: episode: 7209, duration: 1.409s, episode steps: 27, steps per second: 19, episode reward: -1.000, m

  436224/5000000: episode: 7234, duration: 1.596s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.618 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001896, mean_absolute_error: 0.379075, mean_q: -0.433348
  436250/5000000: episode: 7235, duration: 1.314s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002066, mean_absolute_error: 0.377004, mean_q: -0.430267
  436311/5000000: episode: 7236, duration: 2.897s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.525 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001943, mean_absolute_error: 0.378848, mean_q: -0.433370
  436349/5000000: episode: 7237, duration: 1.968s, episode steps: 38, steps per second: 19, episode reward: -1.000, me

  437922/5000000: episode: 7262, duration: 2.717s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.964 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002031, mean_absolute_error: 0.376104, mean_q: -0.430757
  437952/5000000: episode: 7263, duration: 1.499s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.833 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001928, mean_absolute_error: 0.375429, mean_q: -0.428716
  437977/5000000: episode: 7264, duration: 1.289s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002081, mean_absolute_error: 0.376327, mean_q: -0.430405
  438035/5000000: episode: 7265, duration: 2.853s, episode steps: 58, steps per second: 20, episode reward: -1.000, me

  439614/5000000: episode: 7290, duration: 1.370s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002073, mean_absolute_error: 0.367735, mean_q: -0.419148
  439640/5000000: episode: 7291, duration: 1.333s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002006, mean_absolute_error: 0.366505, mean_q: -0.417408
  439668/5000000: episode: 7292, duration: 1.365s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.643 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002026, mean_absolute_error: 0.367712, mean_q: -0.419021
  439698/5000000: episode: 7293, duration: 1.399s, episode steps: 30, steps per second: 21, episode reward: -1.000, me

  441093/5000000: episode: 7318, duration: 2.924s, episode steps: 63, steps per second: 22, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.206 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.002156, mean_absolute_error: 0.363257, mean_q: -0.412935
  441119/5000000: episode: 7319, duration: 1.248s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002154, mean_absolute_error: 0.363508, mean_q: -0.413063
  441171/5000000: episode: 7320, duration: 2.569s, episode steps: 52, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002034, mean_absolute_error: 0.361710, mean_q: -0.412652
  441203/5000000: episode: 7321, duration: 1.581s, episode steps: 32, steps per second: 20, episode reward: -1.000, me

  442860/5000000: episode: 7346, duration: 2.500s, episode steps: 56, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002004, mean_absolute_error: 0.356052, mean_q: -0.403527
  442885/5000000: episode: 7347, duration: 1.292s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001932, mean_absolute_error: 0.356151, mean_q: -0.405876
  442928/5000000: episode: 7348, duration: 2.170s, episode steps: 43, steps per second: 20, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.814 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002190, mean_absolute_error: 0.358553, mean_q: -0.408451
  442980/5000000: episode: 7349, duration: 2.428s, episode steps: 52, steps per second: 21, episode reward: -1.000, me

  444107/5000000: episode: 7374, duration: 1.373s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002060, mean_absolute_error: 0.354499, mean_q: -0.403951
  444135/5000000: episode: 7375, duration: 1.371s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.107 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.002094, mean_absolute_error: 0.355791, mean_q: -0.404239
  444167/5000000: episode: 7376, duration: 1.581s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.031 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002198, mean_absolute_error: 0.355541, mean_q: -0.404424
  444192/5000000: episode: 7377, duration: 1.231s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  445688/5000000: episode: 7402, duration: 1.231s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001974, mean_absolute_error: 0.352770, mean_q: -0.400196
  445714/5000000: episode: 7403, duration: 1.198s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.038 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.002118, mean_absolute_error: 0.352726, mean_q: -0.398738
  445783/5000000: episode: 7404, duration: 3.323s, episode steps: 69, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.623 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002116, mean_absolute_error: 0.352004, mean_q: -0.398044
  445819/5000000: episode: 7405, duration: 1.788s, episode steps: 36, steps per second: 20, episode reward: -1.000, me

  446978/5000000: episode: 7430, duration: 1.240s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002040, mean_absolute_error: 0.349548, mean_q: -0.395990
  447007/5000000: episode: 7431, duration: 1.472s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001915, mean_absolute_error: 0.347273, mean_q: -0.392441
  447032/5000000: episode: 7432, duration: 1.306s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002135, mean_absolute_error: 0.347889, mean_q: -0.393902
  447141/5000000: episode: 7433, duration: 5.187s, episode steps: 109, steps per second: 21, episode reward: -1.000, m

  448774/5000000: episode: 7458, duration: 5.390s, episode steps: 113, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.965 [0.000, 5.000], mean observation: 0.058 [0.000, 24.000], loss: 0.002167, mean_absolute_error: 0.353612, mean_q: -0.401310
  448800/5000000: episode: 7459, duration: 1.315s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.002204, mean_absolute_error: 0.354440, mean_q: -0.400300
  448871/5000000: episode: 7460, duration: 3.345s, episode steps: 71, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.662 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.002173, mean_absolute_error: 0.353886, mean_q: -0.400914
  448956/5000000: episode: 7461, duration: 4.048s, episode steps: 85, steps per second: 21, episode reward: -1.000, m

  450663/5000000: episode: 7486, duration: 1.412s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.002175, mean_absolute_error: 0.362312, mean_q: -0.413576
  450694/5000000: episode: 7487, duration: 1.577s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.002003, mean_absolute_error: 0.364164, mean_q: -0.416164
  450804/5000000: episode: 7488, duration: 5.243s, episode steps: 110, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.002058, mean_absolute_error: 0.361186, mean_q: -0.411895
  450833/5000000: episode: 7489, duration: 1.380s, episode steps: 29, steps per second: 21, episode reward: -1.000, m

  452260/5000000: episode: 7514, duration: 1.814s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.378 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001988, mean_absolute_error: 0.368415, mean_q: -0.422195
  452315/5000000: episode: 7515, duration: 2.503s, episode steps: 55, steps per second: 22, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.255 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001997, mean_absolute_error: 0.368362, mean_q: -0.422908
  452348/5000000: episode: 7516, duration: 1.705s, episode steps: 33, steps per second: 19, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.758 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001967, mean_absolute_error: 0.368725, mean_q: -0.423054
  452513/5000000: episode: 7517, duration: 7.914s, episode steps: 165, steps per second: 21, episode reward: -1.000, m

  454242/5000000: episode: 7542, duration: 8.624s, episode steps: 198, steps per second: 23, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.530 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.001978, mean_absolute_error: 0.379454, mean_q: -0.438245
  454272/5000000: episode: 7543, duration: 1.523s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.367 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001847, mean_absolute_error: 0.381247, mean_q: -0.440462
  454305/5000000: episode: 7544, duration: 1.625s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.364 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001873, mean_absolute_error: 0.378906, mean_q: -0.438273
  454450/5000000: episode: 7545, duration: 6.964s, episode steps: 145, steps per second: 21, episode reward: -1.000, 

  456206/5000000: episode: 7570, duration: 1.258s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001940, mean_absolute_error: 0.390718, mean_q: -0.451977
  456285/5000000: episode: 7571, duration: 3.578s, episode steps: 79, steps per second: 22, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001933, mean_absolute_error: 0.388683, mean_q: -0.450070
  456347/5000000: episode: 7572, duration: 2.953s, episode steps: 62, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.726 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001845, mean_absolute_error: 0.388953, mean_q: -0.451091
  456374/5000000: episode: 7573, duration: 1.379s, episode steps: 27, steps per second: 20, episode reward: -1.000, me

  457587/5000000: episode: 7598, duration: 2.662s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.839 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001850, mean_absolute_error: 0.391580, mean_q: -0.453841
  457614/5000000: episode: 7599, duration: 1.404s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001752, mean_absolute_error: 0.391512, mean_q: -0.453953
  457686/5000000: episode: 7600, duration: 3.397s, episode steps: 72, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.569 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001888, mean_absolute_error: 0.393381, mean_q: -0.456767
  457776/5000000: episode: 7601, duration: 4.444s, episode steps: 90, steps per second: 20, episode reward: -1.000, me

  459210/5000000: episode: 7626, duration: 4.284s, episode steps: 90, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001777, mean_absolute_error: 0.398564, mean_q: -0.463904
  459271/5000000: episode: 7627, duration: 2.959s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.098 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001751, mean_absolute_error: 0.397871, mean_q: -0.462692
  459341/5000000: episode: 7628, duration: 3.386s, episode steps: 70, steps per second: 21, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001698, mean_absolute_error: 0.398621, mean_q: -0.463480
  459398/5000000: episode: 7629, duration: 2.766s, episode steps: 57, steps per second: 21, episode reward: -1.000, me

  460962/5000000: episode: 7654, duration: 1.167s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001762, mean_absolute_error: 0.403436, mean_q: -0.470094
  460989/5000000: episode: 7655, duration: 1.334s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.185 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001750, mean_absolute_error: 0.404990, mean_q: -0.472756
  461120/5000000: episode: 7656, duration: 6.245s, episode steps: 131, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.412 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001659, mean_absolute_error: 0.406826, mean_q: -0.474363
  461145/5000000: episode: 7657, duration: 1.212s, episode steps: 25, steps per second: 21, episode reward: -1.000, m

  462426/5000000: episode: 7682, duration: 4.747s, episode steps: 97, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.268 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001666, mean_absolute_error: 0.411778, mean_q: -0.482043
  462522/5000000: episode: 7683, duration: 4.578s, episode steps: 96, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.479 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001703, mean_absolute_error: 0.412320, mean_q: -0.482117
  462548/5000000: episode: 7684, duration: 1.206s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001647, mean_absolute_error: 0.411846, mean_q: -0.481318
  462575/5000000: episode: 7685, duration: 1.328s, episode steps: 27, steps per second: 20, episode reward: -1.000, me

  464256/5000000: episode: 7710, duration: 5.996s, episode steps: 125, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.504 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001667, mean_absolute_error: 0.415073, mean_q: -0.485812
  464286/5000000: episode: 7711, duration: 1.437s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.067 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001579, mean_absolute_error: 0.416469, mean_q: -0.487823
  464406/5000000: episode: 7712, duration: 5.429s, episode steps: 120, steps per second: 22, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.425 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.001681, mean_absolute_error: 0.415598, mean_q: -0.486740
  464431/5000000: episode: 7713, duration: 1.117s, episode steps: 25, steps per second: 22, episode reward: -1.000, 

  465698/5000000: episode: 7738, duration: 1.517s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.281 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001638, mean_absolute_error: 0.418018, mean_q: -0.490034
  465899/5000000: episode: 7739, duration: 9.861s, episode steps: 201, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.597 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001567, mean_absolute_error: 0.416856, mean_q: -0.488426
  465924/5000000: episode: 7740, duration: 1.327s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001505, mean_absolute_error: 0.417231, mean_q: -0.488819
  466096/5000000: episode: 7741, duration: 8.607s, episode steps: 172, steps per second: 20, episode reward: -1.000, 

  467391/5000000: episode: 7766, duration: 1.222s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001481, mean_absolute_error: 0.419803, mean_q: -0.492287
  467417/5000000: episode: 7767, duration: 1.353s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001536, mean_absolute_error: 0.417933, mean_q: -0.488923
  467442/5000000: episode: 7768, duration: 1.220s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001602, mean_absolute_error: 0.417696, mean_q: -0.489786
  467467/5000000: episode: 7769, duration: 1.270s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  468718/5000000: episode: 7794, duration: 1.136s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.160 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001636, mean_absolute_error: 0.422190, mean_q: -0.495025
  468827/5000000: episode: 7795, duration: 5.352s, episode steps: 109, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.550 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001653, mean_absolute_error: 0.421763, mean_q: -0.494744
  468883/5000000: episode: 7796, duration: 2.683s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.625 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001624, mean_absolute_error: 0.422045, mean_q: -0.496413
  468941/5000000: episode: 7797, duration: 2.881s, episode steps: 58, steps per second: 20, episode reward: -1.000, m

  470807/5000000: episode: 7822, duration: 1.236s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001476, mean_absolute_error: 0.421037, mean_q: -0.493331
  470844/5000000: episode: 7823, duration: 1.810s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.622 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001570, mean_absolute_error: 0.423726, mean_q: -0.496166
  470968/5000000: episode: 7824, duration: 6.189s, episode steps: 124, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.435 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001508, mean_absolute_error: 0.423598, mean_q: -0.496253
  470993/5000000: episode: 7825, duration: 1.253s, episode steps: 25, steps per second: 20, episode reward: -1.000, m

  472513/5000000: episode: 7850, duration: 3.838s, episode steps: 80, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.825 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001568, mean_absolute_error: 0.422328, mean_q: -0.494934
  472544/5000000: episode: 7851, duration: 1.457s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.806 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001520, mean_absolute_error: 0.423642, mean_q: -0.495964
  472571/5000000: episode: 7852, duration: 1.412s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.963 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001470, mean_absolute_error: 0.421960, mean_q: -0.493676
  472608/5000000: episode: 7853, duration: 1.905s, episode steps: 37, steps per second: 19, episode reward: -1.000, me

  474418/5000000: episode: 7878, duration: 7.896s, episode steps: 158, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.557 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001569, mean_absolute_error: 0.421474, mean_q: -0.494277
  474478/5000000: episode: 7879, duration: 2.948s, episode steps: 60, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001501, mean_absolute_error: 0.420593, mean_q: -0.493380
  474597/5000000: episode: 7880, duration: 5.534s, episode steps: 119, steps per second: 22, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001480, mean_absolute_error: 0.421291, mean_q: -0.494202
  474661/5000000: episode: 7881, duration: 3.058s, episode steps: 64, steps per second: 21, episode reward: -1.000, 

  475975/5000000: episode: 7906, duration: 1.309s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001539, mean_absolute_error: 0.421829, mean_q: -0.495343
  476007/5000000: episode: 7907, duration: 1.667s, episode steps: 32, steps per second: 19, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.656 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001765, mean_absolute_error: 0.425766, mean_q: -0.499160
  476032/5000000: episode: 7908, duration: 1.214s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001449, mean_absolute_error: 0.423437, mean_q: -0.496591
  476090/5000000: episode: 7909, duration: 2.780s, episode steps: 58, steps per second: 21, episode reward: -1.000, me

  477832/5000000: episode: 7934, duration: 1.537s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.774 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001620, mean_absolute_error: 0.423061, mean_q: -0.496704
  477858/5000000: episode: 7935, duration: 1.248s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001555, mean_absolute_error: 0.424736, mean_q: -0.497724
  477885/5000000: episode: 7936, duration: 1.371s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001641, mean_absolute_error: 0.422373, mean_q: -0.494979
  478031/5000000: episode: 7937, duration: 6.580s, episode steps: 146, steps per second: 22, episode reward: -1.000, m

  479723/5000000: episode: 7962, duration: 4.609s, episode steps: 95, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.368 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001592, mean_absolute_error: 0.423954, mean_q: -0.496854
  479751/5000000: episode: 7963, duration: 1.278s, episode steps: 28, steps per second: 22, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001575, mean_absolute_error: 0.420028, mean_q: -0.492417
  479826/5000000: episode: 7964, duration: 3.668s, episode steps: 75, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.653 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001523, mean_absolute_error: 0.422924, mean_q: -0.494954
  479851/5000000: episode: 7965, duration: 1.274s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  481403/5000000: episode: 7990, duration: 3.228s, episode steps: 72, steps per second: 22, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.361 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001557, mean_absolute_error: 0.420768, mean_q: -0.492625
  481443/5000000: episode: 7991, duration: 1.921s, episode steps: 40, steps per second: 21, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.275 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001599, mean_absolute_error: 0.421779, mean_q: -0.493772
  481605/5000000: episode: 7992, duration: 7.791s, episode steps: 162, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.383 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001623, mean_absolute_error: 0.420242, mean_q: -0.491849
  481634/5000000: episode: 7993, duration: 1.409s, episode steps: 29, steps per second: 21, episode reward: -1.000, m

  482968/5000000: episode: 8018, duration: 1.269s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001601, mean_absolute_error: 0.420999, mean_q: -0.493036
  483020/5000000: episode: 8019, duration: 2.557s, episode steps: 52, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.077 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001591, mean_absolute_error: 0.419092, mean_q: -0.490793
  483104/5000000: episode: 8020, duration: 3.729s, episode steps: 84, steps per second: 23, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001607, mean_absolute_error: 0.421006, mean_q: -0.492602
  483345/5000000: episode: 8021, duration: 11.573s, episode steps: 241, steps per second: 21, episode reward: -1.000, 

  484738/5000000: episode: 8046, duration: 1.554s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.422889, mean_q: -0.495378
  484776/5000000: episode: 8047, duration: 1.839s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.395 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001493, mean_absolute_error: 0.425519, mean_q: -0.498378
  484808/5000000: episode: 8048, duration: 1.518s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.469 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001508, mean_absolute_error: 0.424059, mean_q: -0.497347
  484863/5000000: episode: 8049, duration: 2.728s, episode steps: 55, steps per second: 20, episode reward: -1.000, me

  486171/5000000: episode: 8074, duration: 1.381s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001639, mean_absolute_error: 0.427423, mean_q: -0.501287
  486197/5000000: episode: 8075, duration: 1.304s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001566, mean_absolute_error: 0.428393, mean_q: -0.502559
  486226/5000000: episode: 8076, duration: 1.603s, episode steps: 29, steps per second: 18, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001506, mean_absolute_error: 0.427241, mean_q: -0.501656
  486272/5000000: episode: 8077, duration: 2.113s, episode steps: 46, steps per second: 22, episode reward: -1.000, me

  487647/5000000: episode: 8102, duration: 1.273s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001573, mean_absolute_error: 0.431769, mean_q: -0.506231
  487720/5000000: episode: 8103, duration: 3.584s, episode steps: 73, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.603 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001552, mean_absolute_error: 0.431292, mean_q: -0.505861
  487754/5000000: episode: 8104, duration: 1.630s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.559 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001566, mean_absolute_error: 0.430071, mean_q: -0.504313
  487793/5000000: episode: 8105, duration: 1.979s, episode steps: 39, steps per second: 20, episode reward: -1.000, me

  489216/5000000: episode: 8130, duration: 6.695s, episode steps: 139, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001548, mean_absolute_error: 0.429037, mean_q: -0.502770
  489301/5000000: episode: 8131, duration: 4.266s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.271 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.429118, mean_q: -0.502568
  489334/5000000: episode: 8132, duration: 1.668s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001549, mean_absolute_error: 0.428380, mean_q: -0.501812
  489398/5000000: episode: 8133, duration: 2.985s, episode steps: 64, steps per second: 21, episode reward: -1.000, m

  491338/5000000: episode: 8158, duration: 4.514s, episode steps: 94, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.309 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001571, mean_absolute_error: 0.429946, mean_q: -0.504314
  491380/5000000: episode: 8159, duration: 2.095s, episode steps: 42, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.524 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001649, mean_absolute_error: 0.428550, mean_q: -0.501058
  491405/5000000: episode: 8160, duration: 1.230s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.160 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001539, mean_absolute_error: 0.429544, mean_q: -0.502414
  491609/5000000: episode: 8161, duration: 10.108s, episode steps: 204, steps per second: 20, episode reward: -1.000, 

  492816/5000000: episode: 8186, duration: 3.046s, episode steps: 65, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001549, mean_absolute_error: 0.434084, mean_q: -0.509863
  492872/5000000: episode: 8187, duration: 2.653s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.446 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001507, mean_absolute_error: 0.432879, mean_q: -0.507805
  493093/5000000: episode: 8188, duration: 10.193s, episode steps: 221, steps per second: 22, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.376 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001546, mean_absolute_error: 0.432964, mean_q: -0.508266
  493161/5000000: episode: 8189, duration: 3.234s, episode steps: 68, steps per second: 21, episode reward: -1.000, 

  494589/5000000: episode: 8214, duration: 1.633s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.441 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.432592, mean_q: -0.508340
  494726/5000000: episode: 8215, duration: 6.552s, episode steps: 137, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.679 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001509, mean_absolute_error: 0.432642, mean_q: -0.508119
  494757/5000000: episode: 8216, duration: 1.639s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 3.194 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001545, mean_absolute_error: 0.432026, mean_q: -0.506646
  494785/5000000: episode: 8217, duration: 1.387s, episode steps: 28, steps per second: 20, episode reward: -1.000, m

  496617/5000000: episode: 8242, duration: 2.398s, episode steps: 50, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.300 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001490, mean_absolute_error: 0.430416, mean_q: -0.505615
  496643/5000000: episode: 8243, duration: 1.408s, episode steps: 26, steps per second: 18, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001589, mean_absolute_error: 0.432958, mean_q: -0.508765
  496682/5000000: episode: 8244, duration: 1.904s, episode steps: 39, steps per second: 20, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001632, mean_absolute_error: 0.432878, mean_q: -0.506894
  496858/5000000: episode: 8245, duration: 8.503s, episode steps: 176, steps per second: 21, episode reward: -1.000, m

  498336/5000000: episode: 8270, duration: 2.614s, episode steps: 53, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.698 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001523, mean_absolute_error: 0.425776, mean_q: -0.499679
  498462/5000000: episode: 8271, duration: 6.075s, episode steps: 126, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.595 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001479, mean_absolute_error: 0.426484, mean_q: -0.500694
  498545/5000000: episode: 8272, duration: 4.212s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.325 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001539, mean_absolute_error: 0.426242, mean_q: -0.498374
  498590/5000000: episode: 8273, duration: 2.113s, episode steps: 45, steps per second: 21, episode reward: -1.000, m

  500325/5000000: episode: 8298, duration: 4.192s, episode steps: 87, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.529 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001565, mean_absolute_error: 0.420037, mean_q: -0.492676
  500411/5000000: episode: 8299, duration: 4.358s, episode steps: 86, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.453 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001561, mean_absolute_error: 0.419652, mean_q: -0.491715
  500509/5000000: episode: 8300, duration: 4.656s, episode steps: 98, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.255 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001610, mean_absolute_error: 0.419941, mean_q: -0.492322
  500654/5000000: episode: 8301, duration: 6.589s, episode steps: 145, steps per second: 22, episode reward: -1.000, m

  502372/5000000: episode: 8326, duration: 1.537s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001660, mean_absolute_error: 0.414440, mean_q: -0.484015
  502502/5000000: episode: 8327, duration: 6.414s, episode steps: 130, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.546 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001667, mean_absolute_error: 0.413566, mean_q: -0.483859
  502527/5000000: episode: 8328, duration: 1.305s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001528, mean_absolute_error: 0.413886, mean_q: -0.484818
  502581/5000000: episode: 8329, duration: 2.586s, episode steps: 54, steps per second: 21, episode reward: -1.000, m

  504051/5000000: episode: 8354, duration: 1.468s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001561, mean_absolute_error: 0.412122, mean_q: -0.482421
  504131/5000000: episode: 8355, duration: 3.883s, episode steps: 80, steps per second: 21, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.837 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001570, mean_absolute_error: 0.415568, mean_q: -0.487428
  504196/5000000: episode: 8356, duration: 3.279s, episode steps: 65, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.785 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001611, mean_absolute_error: 0.415648, mean_q: -0.486407
  504242/5000000: episode: 8357, duration: 2.211s, episode steps: 46, steps per second: 21, episode reward: -1.000, me

  506222/5000000: episode: 8382, duration: 1.494s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.581 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.416063, mean_q: -0.488981
  506444/5000000: episode: 8383, duration: 10.989s, episode steps: 222, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.541 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.416647, mean_q: -0.489410
  506474/5000000: episode: 8384, duration: 1.529s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.900 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001670, mean_absolute_error: 0.418919, mean_q: -0.491958
  506527/5000000: episode: 8385, duration: 2.525s, episode steps: 53, steps per second: 21, episode reward: -1.000, 

  507794/5000000: episode: 8410, duration: 1.236s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001575, mean_absolute_error: 0.416779, mean_q: -0.489603
  507820/5000000: episode: 8411, duration: 1.306s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001621, mean_absolute_error: 0.415676, mean_q: -0.487912
  507857/5000000: episode: 8412, duration: 1.705s, episode steps: 37, steps per second: 22, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.811 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001484, mean_absolute_error: 0.417272, mean_q: -0.490261
  508015/5000000: episode: 8413, duration: 7.731s, episode steps: 158, steps per second: 20, episode reward: -1.000, m

  510026/5000000: episode: 8438, duration: 8.023s, episode steps: 163, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001513, mean_absolute_error: 0.422495, mean_q: -0.496606
  510106/5000000: episode: 8439, duration: 3.965s, episode steps: 80, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001544, mean_absolute_error: 0.422260, mean_q: -0.496164
  510133/5000000: episode: 8440, duration: 1.262s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001510, mean_absolute_error: 0.420653, mean_q: -0.493509
  510195/5000000: episode: 8441, duration: 2.991s, episode steps: 62, steps per second: 21, episode reward: -1.000, m

  512039/5000000: episode: 8466, duration: 2.193s, episode steps: 29, steps per second: 13, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.103 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001575, mean_absolute_error: 0.425517, mean_q: -0.499678
  512127/5000000: episode: 8467, duration: 7.211s, episode steps: 88, steps per second: 12, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.818 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001584, mean_absolute_error: 0.424525, mean_q: -0.498923
  512189/5000000: episode: 8468, duration: 5.057s, episode steps: 62, steps per second: 12, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.306 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001513, mean_absolute_error: 0.424439, mean_q: -0.498206
  512231/5000000: episode: 8469, duration: 3.534s, episode steps: 42, steps per second: 12, episode reward: -1.000, me

  513663/5000000: episode: 8494, duration: 2.593s, episode steps: 52, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.904 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001516, mean_absolute_error: 0.424980, mean_q: -0.500240
  513691/5000000: episode: 8495, duration: 1.374s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.821 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001578, mean_absolute_error: 0.424419, mean_q: -0.498945
  513808/5000000: episode: 8496, duration: 5.736s, episode steps: 117, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.564 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001565, mean_absolute_error: 0.425666, mean_q: -0.500022
  513924/5000000: episode: 8497, duration: 5.519s, episode steps: 116, steps per second: 21, episode reward: -1.000, 

  515268/5000000: episode: 8522, duration: 1.686s, episode steps: 34, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001598, mean_absolute_error: 0.426141, mean_q: -0.501212
  515323/5000000: episode: 8523, duration: 2.618s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001421, mean_absolute_error: 0.423224, mean_q: -0.498078
  515351/5000000: episode: 8524, duration: 1.446s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.286 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001523, mean_absolute_error: 0.422768, mean_q: -0.497546
  515408/5000000: episode: 8525, duration: 2.940s, episode steps: 57, steps per second: 19, episode reward: -1.000, me

  516741/5000000: episode: 8550, duration: 4.692s, episode steps: 105, steps per second: 22, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.619 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.001442, mean_absolute_error: 0.422922, mean_q: -0.497743
  516774/5000000: episode: 8551, duration: 1.592s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.091 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001548, mean_absolute_error: 0.421733, mean_q: -0.495442
  516901/5000000: episode: 8552, duration: 6.017s, episode steps: 127, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001544, mean_absolute_error: 0.421350, mean_q: -0.495501
  516932/5000000: episode: 8553, duration: 1.532s, episode steps: 31, steps per second: 20, episode reward: -1.000, 

  518314/5000000: episode: 8578, duration: 1.925s, episode steps: 40, steps per second: 21, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.450 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001546, mean_absolute_error: 0.421057, mean_q: -0.494748
  518363/5000000: episode: 8579, duration: 2.291s, episode steps: 49, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.143 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001549, mean_absolute_error: 0.422279, mean_q: -0.496389
  518462/5000000: episode: 8580, duration: 4.642s, episode steps: 99, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.626 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001492, mean_absolute_error: 0.421624, mean_q: -0.494903
  518488/5000000: episode: 8581, duration: 1.249s, episode steps: 26, steps per second: 21, episode reward: -1.000, me

  520023/5000000: episode: 8606, duration: 4.274s, episode steps: 83, steps per second: 19, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.627 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001567, mean_absolute_error: 0.417430, mean_q: -0.489079
  520054/5000000: episode: 8607, duration: 1.649s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001516, mean_absolute_error: 0.416499, mean_q: -0.488585
  520085/5000000: episode: 8608, duration: 1.608s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001542, mean_absolute_error: 0.415502, mean_q: -0.487549
  520110/5000000: episode: 8609, duration: 1.315s, episode steps: 25, steps per second: 19, episode reward: -1.000, me

  521584/5000000: episode: 8634, duration: 5.720s, episode steps: 115, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.574 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001564, mean_absolute_error: 0.415146, mean_q: -0.487100
  521679/5000000: episode: 8635, duration: 4.753s, episode steps: 95, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001605, mean_absolute_error: 0.414798, mean_q: -0.485587
  521715/5000000: episode: 8636, duration: 1.840s, episode steps: 36, steps per second: 20, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.583 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001549, mean_absolute_error: 0.415122, mean_q: -0.487156
  521842/5000000: episode: 8637, duration: 6.185s, episode steps: 127, steps per second: 21, episode reward: -1.000, 

  523438/5000000: episode: 8662, duration: 2.421s, episode steps: 50, steps per second: 21, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001520, mean_absolute_error: 0.410757, mean_q: -0.480670
  523539/5000000: episode: 8663, duration: 4.907s, episode steps: 101, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.178 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001648, mean_absolute_error: 0.412031, mean_q: -0.482585
  523569/5000000: episode: 8664, duration: 1.510s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001625, mean_absolute_error: 0.410554, mean_q: -0.479969
  523600/5000000: episode: 8665, duration: 1.504s, episode steps: 31, steps per second: 21, episode reward: -1.000, m

  525119/5000000: episode: 8690, duration: 1.364s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.793 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.405343, mean_q: -0.474711
  525148/5000000: episode: 8691, duration: 1.322s, episode steps: 29, steps per second: 22, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.414 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001630, mean_absolute_error: 0.405781, mean_q: -0.474060
  525184/5000000: episode: 8692, duration: 1.677s, episode steps: 36, steps per second: 21, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.806 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001593, mean_absolute_error: 0.406916, mean_q: -0.475897
  525244/5000000: episode: 8693, duration: 2.749s, episode steps: 60, steps per second: 22, episode reward: -1.000, me

  526782/5000000: episode: 8718, duration: 4.183s, episode steps: 88, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.227 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001710, mean_absolute_error: 0.398323, mean_q: -0.465340
  526807/5000000: episode: 8719, duration: 1.233s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001795, mean_absolute_error: 0.395520, mean_q: -0.460436
  526870/5000000: episode: 8720, duration: 2.912s, episode steps: 63, steps per second: 22, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.381 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001801, mean_absolute_error: 0.397762, mean_q: -0.463728
  526901/5000000: episode: 8721, duration: 1.505s, episode steps: 31, steps per second: 21, episode reward: -1.000, me

  528100/5000000: episode: 8746, duration: 1.315s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001719, mean_absolute_error: 0.396871, mean_q: -0.464540
  528134/5000000: episode: 8747, duration: 1.752s, episode steps: 34, steps per second: 19, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.265 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001738, mean_absolute_error: 0.400704, mean_q: -0.469078
  528176/5000000: episode: 8748, duration: 2.054s, episode steps: 42, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.405 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001679, mean_absolute_error: 0.397069, mean_q: -0.463884
  528207/5000000: episode: 8749, duration: 1.526s, episode steps: 31, steps per second: 20, episode reward: -1.000, me

  530044/5000000: episode: 8774, duration: 2.637s, episode steps: 56, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.125 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001755, mean_absolute_error: 0.398228, mean_q: -0.464622
  530072/5000000: episode: 8775, duration: 1.456s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001866, mean_absolute_error: 0.396790, mean_q: -0.463108
  530099/5000000: episode: 8776, duration: 1.358s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.889 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001694, mean_absolute_error: 0.396109, mean_q: -0.463871
  530127/5000000: episode: 8777, duration: 1.333s, episode steps: 28, steps per second: 21, episode reward: -1.000, me

  531567/5000000: episode: 8802, duration: 3.360s, episode steps: 67, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.179 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001711, mean_absolute_error: 0.395501, mean_q: -0.462130
  531593/5000000: episode: 8803, duration: 1.263s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001840, mean_absolute_error: 0.396883, mean_q: -0.465009
  531630/5000000: episode: 8804, duration: 1.760s, episode steps: 37, steps per second: 21, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.865 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001713, mean_absolute_error: 0.397450, mean_q: -0.464602
  531670/5000000: episode: 8805, duration: 1.900s, episode steps: 40, steps per second: 21, episode reward: -1.000, me

  533461/5000000: episode: 8830, duration: 1.364s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001739, mean_absolute_error: 0.391330, mean_q: -0.456838
  533516/5000000: episode: 8831, duration: 2.667s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.982 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001717, mean_absolute_error: 0.389076, mean_q: -0.453662
  533544/5000000: episode: 8832, duration: 1.415s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.107 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001791, mean_absolute_error: 0.388398, mean_q: -0.451824
  533658/5000000: episode: 8833, duration: 5.544s, episode steps: 114, steps per second: 21, episode reward: -1.000, m

  534942/5000000: episode: 8858, duration: 1.574s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.233 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001908, mean_absolute_error: 0.387594, mean_q: -0.452740
  534971/5000000: episode: 8859, duration: 1.406s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.724 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001673, mean_absolute_error: 0.389275, mean_q: -0.453845
  535032/5000000: episode: 8860, duration: 13.204s, episode steps: 61, steps per second: 5, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.656 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001724, mean_absolute_error: 0.387800, mean_q: -0.452091
  535146/5000000: episode: 8861, duration: 5.643s, episode steps: 114, steps per second: 20, episode reward: -1.000, m

  536861/5000000: episode: 8886, duration: 1.815s, episode steps: 38, steps per second: 21, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.526 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001739, mean_absolute_error: 0.386708, mean_q: -0.452105
  536888/5000000: episode: 8887, duration: 1.427s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.037 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001758, mean_absolute_error: 0.385640, mean_q: -0.449685
  536915/5000000: episode: 8888, duration: 1.321s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.148 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001692, mean_absolute_error: 0.385197, mean_q: -0.450238
  536940/5000000: episode: 8889, duration: 1.215s, episode steps: 25, steps per second: 21, episode reward: -1.000, me

  538144/5000000: episode: 8914, duration: 1.623s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.879 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001642, mean_absolute_error: 0.392960, mean_q: -0.460247
  538231/5000000: episode: 8915, duration: 4.352s, episode steps: 87, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.793 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001773, mean_absolute_error: 0.392545, mean_q: -0.458584
  538256/5000000: episode: 8916, duration: 1.257s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001733, mean_absolute_error: 0.391851, mean_q: -0.458207
  538309/5000000: episode: 8917, duration: 2.529s, episode steps: 53, steps per second: 21, episode reward: -1.000, me

  539805/5000000: episode: 8942, duration: 4.297s, episode steps: 91, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001664, mean_absolute_error: 0.394423, mean_q: -0.460984
  539899/5000000: episode: 8943, duration: 4.762s, episode steps: 94, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.383 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001698, mean_absolute_error: 0.394977, mean_q: -0.462538
  539933/5000000: episode: 8944, duration: 1.652s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.176 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001653, mean_absolute_error: 0.394830, mean_q: -0.462438
  539961/5000000: episode: 8945, duration: 1.300s, episode steps: 28, steps per second: 22, episode reward: -1.000, me

  541111/5000000: episode: 8970, duration: 1.483s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001721, mean_absolute_error: 0.399260, mean_q: -0.467645
  541136/5000000: episode: 8971, duration: 1.379s, episode steps: 25, steps per second: 18, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.040 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001674, mean_absolute_error: 0.402687, mean_q: -0.472008
  541161/5000000: episode: 8972, duration: 1.190s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001686, mean_absolute_error: 0.399370, mean_q: -0.466571
  541284/5000000: episode: 8973, duration: 6.103s, episode steps: 123, steps per second: 20, episode reward: -1.000, m

  542388/5000000: episode: 8998, duration: 2.080s, episode steps: 41, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.732 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001688, mean_absolute_error: 0.408624, mean_q: -0.479400
  542472/5000000: episode: 8999, duration: 4.048s, episode steps: 84, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.476 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001612, mean_absolute_error: 0.406880, mean_q: -0.476590
  542526/5000000: episode: 9000, duration: 2.461s, episode steps: 54, steps per second: 22, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.611 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001613, mean_absolute_error: 0.406817, mean_q: -0.477026
  542595/5000000: episode: 9001, duration: 3.692s, episode steps: 69, steps per second: 19, episode reward: -1.000, me

  543911/5000000: episode: 9026, duration: 3.492s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001700, mean_absolute_error: 0.409554, mean_q: -0.479990
  543936/5000000: episode: 9027, duration: 1.185s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001682, mean_absolute_error: 0.410799, mean_q: -0.482532
  544024/5000000: episode: 9028, duration: 4.296s, episode steps: 88, steps per second: 20, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.489 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001623, mean_absolute_error: 0.409813, mean_q: -0.481567
  544053/5000000: episode: 9029, duration: 1.402s, episode steps: 29, steps per second: 21, episode reward: -1.000, me

  545349/5000000: episode: 9054, duration: 1.571s, episode steps: 34, steps per second: 22, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.382 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001672, mean_absolute_error: 0.413385, mean_q: -0.484834
  545378/5000000: episode: 9055, duration: 1.422s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.966 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001638, mean_absolute_error: 0.416594, mean_q: -0.489816
  545433/5000000: episode: 9056, duration: 2.638s, episode steps: 55, steps per second: 21, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.691 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001618, mean_absolute_error: 0.414278, mean_q: -0.486858
  545495/5000000: episode: 9057, duration: 3.102s, episode steps: 62, steps per second: 20, episode reward: -1.000, me

  547306/5000000: episode: 9082, duration: 5.150s, episode steps: 107, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.430 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001554, mean_absolute_error: 0.419270, mean_q: -0.492148
  547383/5000000: episode: 9083, duration: 4.034s, episode steps: 77, steps per second: 19, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.078 [0.000, 24.000], loss: 0.001567, mean_absolute_error: 0.417602, mean_q: -0.489821
  547413/5000000: episode: 9084, duration: 1.541s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001557, mean_absolute_error: 0.417737, mean_q: -0.489846
  547468/5000000: episode: 9085, duration: 2.637s, episode steps: 55, steps per second: 21, episode reward: -1.000, m

  548887/5000000: episode: 9110, duration: 1.219s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001607, mean_absolute_error: 0.421406, mean_q: -0.495447
  549014/5000000: episode: 9111, duration: 6.463s, episode steps: 127, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.291 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001560, mean_absolute_error: 0.420816, mean_q: -0.494058
  549072/5000000: episode: 9112, duration: 2.832s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.414 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001557, mean_absolute_error: 0.419535, mean_q: -0.492986
  549139/5000000: episode: 9113, duration: 3.290s, episode steps: 67, steps per second: 20, episode reward: -1.000, m

  550403/5000000: episode: 9138, duration: 2.668s, episode steps: 52, steps per second: 19, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.712 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001636, mean_absolute_error: 0.421919, mean_q: -0.494915
  550430/5000000: episode: 9139, duration: 1.286s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001476, mean_absolute_error: 0.420608, mean_q: -0.492736
  550457/5000000: episode: 9140, duration: 1.372s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001519, mean_absolute_error: 0.420982, mean_q: -0.493655
  550542/5000000: episode: 9141, duration: 4.088s, episode steps: 85, steps per second: 21, episode reward: -1.000, me

  552335/5000000: episode: 9166, duration: 1.310s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001612, mean_absolute_error: 0.432421, mean_q: -0.506686
  552396/5000000: episode: 9167, duration: 2.945s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.213 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001512, mean_absolute_error: 0.431229, mean_q: -0.506432
  552424/5000000: episode: 9168, duration: 1.383s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.857 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001623, mean_absolute_error: 0.432642, mean_q: -0.509202
  552452/5000000: episode: 9169, duration: 1.404s, episode steps: 28, steps per second: 20, episode reward: -1.000, me

  553775/5000000: episode: 9194, duration: 1.295s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001455, mean_absolute_error: 0.434645, mean_q: -0.511026
  553802/5000000: episode: 9195, duration: 1.308s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001505, mean_absolute_error: 0.434549, mean_q: -0.511595
  553833/5000000: episode: 9196, duration: 1.620s, episode steps: 31, steps per second: 19, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.097 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001572, mean_absolute_error: 0.435975, mean_q: -0.513296
  553871/5000000: episode: 9197, duration: 1.952s, episode steps: 38, steps per second: 19, episode reward: -1.000, me

  555247/5000000: episode: 9222, duration: 3.994s, episode steps: 83, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.723 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001562, mean_absolute_error: 0.439599, mean_q: -0.517369
  555303/5000000: episode: 9223, duration: 3.106s, episode steps: 56, steps per second: 18, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001585, mean_absolute_error: 0.442326, mean_q: -0.520853
  555388/5000000: episode: 9224, duration: 4.234s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.435 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.001526, mean_absolute_error: 0.441544, mean_q: -0.519523
  555413/5000000: episode: 9225, duration: 1.342s, episode steps: 25, steps per second: 19, episode reward: -1.000, me

  556768/5000000: episode: 9250, duration: 2.888s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.603 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001453, mean_absolute_error: 0.449015, mean_q: -0.527956
  556801/5000000: episode: 9251, duration: 1.574s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001532, mean_absolute_error: 0.449331, mean_q: -0.528031
  556892/5000000: episode: 9252, duration: 4.151s, episode steps: 91, steps per second: 22, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.780 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001485, mean_absolute_error: 0.451174, mean_q: -0.530882
  556971/5000000: episode: 9253, duration: 3.932s, episode steps: 79, steps per second: 20, episode reward: -1.000, me

  558300/5000000: episode: 9278, duration: 3.988s, episode steps: 82, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.610 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001445, mean_absolute_error: 0.452787, mean_q: -0.533352
  558418/5000000: episode: 9279, duration: 5.827s, episode steps: 118, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.551 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001497, mean_absolute_error: 0.453211, mean_q: -0.532661
  558450/5000000: episode: 9280, duration: 1.550s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001479, mean_absolute_error: 0.453290, mean_q: -0.532300
  558481/5000000: episode: 9281, duration: 1.491s, episode steps: 31, steps per second: 21, episode reward: -1.000, m

  559935/5000000: episode: 9306, duration: 13.897s, episode steps: 281, steps per second: 20, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001453, mean_absolute_error: 0.449065, mean_q: -0.528129
  560043/5000000: episode: 9307, duration: 5.333s, episode steps: 108, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001487, mean_absolute_error: 0.448591, mean_q: -0.527388
  560079/5000000: episode: 9308, duration: 1.794s, episode steps: 36, steps per second: 20, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.278 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001425, mean_absolute_error: 0.448259, mean_q: -0.527546
  560104/5000000: episode: 9309, duration: 1.278s, episode steps: 25, steps per second: 20, episode reward: -1.000,

  561619/5000000: episode: 9334, duration: 3.775s, episode steps: 75, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001469, mean_absolute_error: 0.452875, mean_q: -0.533201
  561664/5000000: episode: 9335, duration: 2.262s, episode steps: 45, steps per second: 20, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.244 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001510, mean_absolute_error: 0.453117, mean_q: -0.533664
  561784/5000000: episode: 9336, duration: 5.930s, episode steps: 120, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001422, mean_absolute_error: 0.450946, mean_q: -0.529813
  561810/5000000: episode: 9337, duration: 1.248s, episode steps: 26, steps per second: 21, episode reward: -1.000, m

  563085/5000000: episode: 9362, duration: 1.277s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001383, mean_absolute_error: 0.453669, mean_q: -0.535016
  563136/5000000: episode: 9363, duration: 2.607s, episode steps: 51, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.588 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001442, mean_absolute_error: 0.454737, mean_q: -0.535978
  563226/5000000: episode: 9364, duration: 4.004s, episode steps: 90, steps per second: 22, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.589 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001435, mean_absolute_error: 0.455489, mean_q: -0.536092
  563292/5000000: episode: 9365, duration: 3.250s, episode steps: 66, steps per second: 20, episode reward: -1.000, me

  566675/5000000: episode: 9429, duration: 1.203s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.080 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001335, mean_absolute_error: 0.454145, mean_q: -0.535360
  566774/5000000: episode: 9430, duration: 4.861s, episode steps: 99, steps per second: 20, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.232 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001473, mean_absolute_error: 0.456811, mean_q: -0.538540
  566850/5000000: episode: 9431, duration: 3.830s, episode steps: 76, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.526 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001476, mean_absolute_error: 0.457242, mean_q: -0.538073
  566880/5000000: episode: 9432, duration: 1.615s, episode steps: 30, steps per second: 19, episode reward: -1.000, me

  568362/5000000: episode: 9457, duration: 1.380s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.179 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001406, mean_absolute_error: 0.455454, mean_q: -0.536235
  568388/5000000: episode: 9458, duration: 1.367s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001370, mean_absolute_error: 0.456781, mean_q: -0.538198
  568419/5000000: episode: 9459, duration: 1.418s, episode steps: 31, steps per second: 22, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.548 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001382, mean_absolute_error: 0.455916, mean_q: -0.537184
  568506/5000000: episode: 9460, duration: 4.104s, episode steps: 87, steps per second: 21, episode reward: -1.000, me

  569968/5000000: episode: 9485, duration: 3.729s, episode steps: 72, steps per second: 19, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001448, mean_absolute_error: 0.453943, mean_q: -0.534232
  569997/5000000: episode: 9486, duration: 1.490s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.793 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001533, mean_absolute_error: 0.454776, mean_q: -0.535933
  570064/5000000: episode: 9487, duration: 3.150s, episode steps: 67, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.522 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001497, mean_absolute_error: 0.455398, mean_q: -0.536372
  570145/5000000: episode: 9488, duration: 4.085s, episode steps: 81, steps per second: 20, episode reward: -1.000, me

  571761/5000000: episode: 9513, duration: 2.519s, episode steps: 50, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.460 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001370, mean_absolute_error: 0.460157, mean_q: -0.542445
  571797/5000000: episode: 9514, duration: 1.849s, episode steps: 36, steps per second: 19, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.278 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001523, mean_absolute_error: 0.461661, mean_q: -0.544490
  571839/5000000: episode: 9515, duration: 2.062s, episode steps: 42, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.595 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001395, mean_absolute_error: 0.461367, mean_q: -0.543634
  571865/5000000: episode: 9516, duration: 1.187s, episode steps: 26, steps per second: 22, episode reward: -1.000, me

  572972/5000000: episode: 9541, duration: 1.295s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001351, mean_absolute_error: 0.463612, mean_q: -0.546677
  572998/5000000: episode: 9542, duration: 1.328s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 1.808 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001582, mean_absolute_error: 0.463965, mean_q: -0.546459
  573029/5000000: episode: 9543, duration: 1.554s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.387 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001487, mean_absolute_error: 0.466804, mean_q: -0.550117
  573087/5000000: episode: 9544, duration: 2.886s, episode steps: 58, steps per second: 20, episode reward: -1.000, me

  574746/5000000: episode: 9569, duration: 8.932s, episode steps: 181, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.061 [0.000, 24.000], loss: 0.001445, mean_absolute_error: 0.462698, mean_q: -0.544926
  574806/5000000: episode: 9570, duration: 2.961s, episode steps: 60, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.283 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001469, mean_absolute_error: 0.463548, mean_q: -0.546384
  574869/5000000: episode: 9571, duration: 3.039s, episode steps: 63, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.619 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001343, mean_absolute_error: 0.461386, mean_q: -0.543182
  574899/5000000: episode: 9572, duration: 1.520s, episode steps: 30, steps per second: 20, episode reward: -1.000, m

  576288/5000000: episode: 9597, duration: 2.126s, episode steps: 43, steps per second: 20, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.488 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001443, mean_absolute_error: 0.462162, mean_q: -0.544046
  576382/5000000: episode: 9598, duration: 4.536s, episode steps: 94, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.617 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001455, mean_absolute_error: 0.464863, mean_q: -0.546972
  576407/5000000: episode: 9599, duration: 1.312s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.467708, mean_q: -0.550885
  576463/5000000: episode: 9600, duration: 2.765s, episode steps: 56, steps per second: 20, episode reward: -1.000, me

  577835/5000000: episode: 9625, duration: 1.627s, episode steps: 34, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.765 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001386, mean_absolute_error: 0.463721, mean_q: -0.546500
  577947/5000000: episode: 9626, duration: 5.667s, episode steps: 112, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.420 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001418, mean_absolute_error: 0.465033, mean_q: -0.547784
  578002/5000000: episode: 9627, duration: 2.727s, episode steps: 55, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.018 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.464158, mean_q: -0.545797
  578069/5000000: episode: 9628, duration: 3.457s, episode steps: 67, steps per second: 19, episode reward: -1.000, m

  579033/5000000: episode: 9653, duration: 1.418s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.767 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.466290, mean_q: -0.548400
  579058/5000000: episode: 9654, duration: 1.127s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001505, mean_absolute_error: 0.464729, mean_q: -0.547220
  579083/5000000: episode: 9655, duration: 1.257s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001384, mean_absolute_error: 0.465995, mean_q: -0.548434
  579342/5000000: episode: 9656, duration: 13.265s, episode steps: 259, steps per second: 20, episode reward: -1.000, 

  580631/5000000: episode: 9681, duration: 3.215s, episode steps: 62, steps per second: 19, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.855 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001419, mean_absolute_error: 0.463126, mean_q: -0.545088
  580796/5000000: episode: 9682, duration: 8.231s, episode steps: 165, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.648 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.001429, mean_absolute_error: 0.463981, mean_q: -0.546209
  580826/5000000: episode: 9683, duration: 1.432s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.233 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001516, mean_absolute_error: 0.465721, mean_q: -0.548111
  580899/5000000: episode: 9684, duration: 3.535s, episode steps: 73, steps per second: 21, episode reward: -1.000, m

  582307/5000000: episode: 9709, duration: 3.942s, episode steps: 82, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.793 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001411, mean_absolute_error: 0.462916, mean_q: -0.544968
  582335/5000000: episode: 9710, duration: 1.351s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.857 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001530, mean_absolute_error: 0.464355, mean_q: -0.546187
  582387/5000000: episode: 9711, duration: 2.678s, episode steps: 52, steps per second: 19, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.904 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001546, mean_absolute_error: 0.464595, mean_q: -0.545815
  582451/5000000: episode: 9712, duration: 3.305s, episode steps: 64, steps per second: 19, episode reward: -1.000, me

  583828/5000000: episode: 9737, duration: 4.286s, episode steps: 85, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.506 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001370, mean_absolute_error: 0.459860, mean_q: -0.542054
  583863/5000000: episode: 9738, duration: 1.878s, episode steps: 35, steps per second: 19, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.543 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001521, mean_absolute_error: 0.458651, mean_q: -0.539692
  583902/5000000: episode: 9739, duration: 2.009s, episode steps: 39, steps per second: 19, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.564 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001355, mean_absolute_error: 0.460314, mean_q: -0.542523
  583993/5000000: episode: 9740, duration: 4.670s, episode steps: 91, steps per second: 19, episode reward: -1.000, me

  585679/5000000: episode: 9765, duration: 4.157s, episode steps: 83, steps per second: 20, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.614 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001506, mean_absolute_error: 0.457976, mean_q: -0.540038
  585705/5000000: episode: 9766, duration: 1.269s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.269 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001412, mean_absolute_error: 0.458359, mean_q: -0.540366
  585738/5000000: episode: 9767, duration: 1.619s, episode steps: 33, steps per second: 20, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.576 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.001379, mean_absolute_error: 0.458625, mean_q: -0.539626
  585779/5000000: episode: 9768, duration: 2.045s, episode steps: 41, steps per second: 20, episode reward: -1.000, me

  587420/5000000: episode: 9793, duration: 1.661s, episode steps: 34, steps per second: 20, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.294 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001510, mean_absolute_error: 0.453694, mean_q: -0.534071
  587538/5000000: episode: 9794, duration: 5.821s, episode steps: 118, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.551 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001403, mean_absolute_error: 0.453400, mean_q: -0.533537
  587661/5000000: episode: 9795, duration: 5.651s, episode steps: 123, steps per second: 22, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.285 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001409, mean_absolute_error: 0.453155, mean_q: -0.533928
  587714/5000000: episode: 9796, duration: 2.626s, episode steps: 53, steps per second: 20, episode reward: -1.000, 

  589053/5000000: episode: 9821, duration: 3.912s, episode steps: 54, steps per second: 14, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.741 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001362, mean_absolute_error: 0.452680, mean_q: -0.533249
  589111/5000000: episode: 9822, duration: 2.894s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001428, mean_absolute_error: 0.450754, mean_q: -0.531100
  589141/5000000: episode: 9823, duration: 1.569s, episode steps: 30, steps per second: 19, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.633 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001532, mean_absolute_error: 0.452351, mean_q: -0.533156
  589221/5000000: episode: 9824, duration: 3.732s, episode steps: 80, steps per second: 21, episode reward: -1.000, me

  590555/5000000: episode: 9849, duration: 3.156s, episode steps: 66, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.576 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001380, mean_absolute_error: 0.449322, mean_q: -0.529611
  590616/5000000: episode: 9850, duration: 3.177s, episode steps: 61, steps per second: 19, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.590 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001450, mean_absolute_error: 0.447818, mean_q: -0.527071
  590643/5000000: episode: 9851, duration: 1.318s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001412, mean_absolute_error: 0.447513, mean_q: -0.526628
  590675/5000000: episode: 9852, duration: 1.531s, episode steps: 32, steps per second: 21, episode reward: -1.000, me

  592124/5000000: episode: 9877, duration: 1.471s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.103 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001485, mean_absolute_error: 0.449966, mean_q: -0.529779
  592152/5000000: episode: 9878, duration: 1.439s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.071 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001341, mean_absolute_error: 0.449259, mean_q: -0.530026
  592227/5000000: episode: 9879, duration: 3.685s, episode steps: 75, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.427 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001431, mean_absolute_error: 0.449907, mean_q: -0.530535
  592252/5000000: episode: 9880, duration: 1.248s, episode steps: 25, steps per second: 20, episode reward: -1.000, me

  594110/5000000: episode: 9905, duration: 1.578s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.875 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001440, mean_absolute_error: 0.450876, mean_q: -0.531785
  594211/5000000: episode: 9906, duration: 4.817s, episode steps: 101, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.743 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001385, mean_absolute_error: 0.451127, mean_q: -0.532257
  594243/5000000: episode: 9907, duration: 1.595s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001428, mean_absolute_error: 0.451266, mean_q: -0.532896
  594339/5000000: episode: 9908, duration: 4.680s, episode steps: 96, steps per second: 21, episode reward: -1.000, m

  595816/5000000: episode: 9933, duration: 1.334s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.960 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001343, mean_absolute_error: 0.459766, mean_q: -0.543420
  595845/5000000: episode: 9934, duration: 1.416s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.448 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001314, mean_absolute_error: 0.457937, mean_q: -0.541383
  595872/5000000: episode: 9935, duration: 1.321s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [1.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001357, mean_absolute_error: 0.459706, mean_q: -0.543282
  596028/5000000: episode: 9936, duration: 7.494s, episode steps: 156, steps per second: 21, episode reward: -1.000, m

  597997/5000000: episode: 9961, duration: 2.726s, episode steps: 54, steps per second: 20, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.593 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001359, mean_absolute_error: 0.465747, mean_q: -0.550033
  598023/5000000: episode: 9962, duration: 1.323s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001289, mean_absolute_error: 0.466124, mean_q: -0.550361
  598064/5000000: episode: 9963, duration: 2.071s, episode steps: 41, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001268, mean_absolute_error: 0.465858, mean_q: -0.550256
  598211/5000000: episode: 9964, duration: 7.300s, episode steps: 147, steps per second: 20, episode reward: -1.000, m

  600072/5000000: episode: 9989, duration: 2.566s, episode steps: 53, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.340 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001302, mean_absolute_error: 0.464242, mean_q: -0.548555
  600166/5000000: episode: 9990, duration: 4.462s, episode steps: 94, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.649 [0.000, 5.000], mean observation: 0.076 [0.000, 24.000], loss: 0.001307, mean_absolute_error: 0.464654, mean_q: -0.549243
  600192/5000000: episode: 9991, duration: 1.202s, episode steps: 26, steps per second: 22, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.038 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.001359, mean_absolute_error: 0.466446, mean_q: -0.550424
  600217/5000000: episode: 9992, duration: 1.313s, episode steps: 25, steps per second: 19, episode reward: -1.000, me

  601944/5000000: episode: 10017, duration: 3.109s, episode steps: 63, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.063 [0.000, 24.000], loss: 0.001335, mean_absolute_error: 0.467018, mean_q: -0.552202
  602056/5000000: episode: 10018, duration: 5.298s, episode steps: 112, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.366 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001303, mean_absolute_error: 0.467927, mean_q: -0.553140
  602085/5000000: episode: 10019, duration: 1.618s, episode steps: 29, steps per second: 18, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001368, mean_absolute_error: 0.467087, mean_q: -0.552300
  602208/5000000: episode: 10020, duration: 6.052s, episode steps: 123, steps per second: 20, episode reward: -1.0

  603564/5000000: episode: 10045, duration: 5.009s, episode steps: 105, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.210 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.001324, mean_absolute_error: 0.467043, mean_q: -0.551743
  603596/5000000: episode: 10046, duration: 1.556s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.719 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001357, mean_absolute_error: 0.467641, mean_q: -0.552864
  603626/5000000: episode: 10047, duration: 1.455s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.933 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001344, mean_absolute_error: 0.467657, mean_q: -0.552098
  603658/5000000: episode: 10048, duration: 1.658s, episode steps: 32, steps per second: 19, episode reward: -1.00

  605488/5000000: episode: 10073, duration: 9.158s, episode steps: 185, steps per second: 20, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.632 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.001280, mean_absolute_error: 0.467914, mean_q: -0.552696
  605568/5000000: episode: 10074, duration: 3.918s, episode steps: 80, steps per second: 20, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.562 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001299, mean_absolute_error: 0.468366, mean_q: -0.553202
  605594/5000000: episode: 10075, duration: 1.264s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001269, mean_absolute_error: 0.467508, mean_q: -0.551844
  605622/5000000: episode: 10076, duration: 1.325s, episode steps: 28, steps per second: 21, episode reward: -1.00

  607062/5000000: episode: 10101, duration: 1.396s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001238, mean_absolute_error: 0.467727, mean_q: -0.552693
  607089/5000000: episode: 10102, duration: 1.435s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001248, mean_absolute_error: 0.468135, mean_q: -0.553643
  607115/5000000: episode: 10103, duration: 1.450s, episode steps: 26, steps per second: 18, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001238, mean_absolute_error: 0.469748, mean_q: -0.555197
  607143/5000000: episode: 10104, duration: 1.427s, episode steps: 28, steps per second: 20, episode reward: -1.000

  608460/5000000: episode: 10129, duration: 3.043s, episode steps: 63, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.619 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001289, mean_absolute_error: 0.465715, mean_q: -0.550261
  608603/5000000: episode: 10130, duration: 6.849s, episode steps: 143, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.601 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.001288, mean_absolute_error: 0.465738, mean_q: -0.549546
  608630/5000000: episode: 10131, duration: 1.437s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001304, mean_absolute_error: 0.463686, mean_q: -0.547055
  608733/5000000: episode: 10132, duration: 5.134s, episode steps: 103, steps per second: 20, episode reward: -1.0

  610318/5000000: episode: 10157, duration: 2.282s, episode steps: 46, steps per second: 20, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.717 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001251, mean_absolute_error: 0.464103, mean_q: -0.547578
  610346/5000000: episode: 10158, duration: 1.435s, episode steps: 28, steps per second: 20, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.821 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001327, mean_absolute_error: 0.463456, mean_q: -0.546834
  610374/5000000: episode: 10159, duration: 1.451s, episode steps: 28, steps per second: 19, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001290, mean_absolute_error: 0.463183, mean_q: -0.547173
  610405/5000000: episode: 10160, duration: 1.544s, episode steps: 31, steps per second: 20, episode reward: -1.000

  612014/5000000: episode: 10185, duration: 2.014s, episode steps: 41, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.293 [0.000, 5.000], mean observation: 0.062 [0.000, 24.000], loss: 0.001258, mean_absolute_error: 0.464730, mean_q: -0.548543
  612084/5000000: episode: 10186, duration: 3.530s, episode steps: 70, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.614 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001319, mean_absolute_error: 0.463507, mean_q: -0.546889
  612167/5000000: episode: 10187, duration: 4.275s, episode steps: 83, steps per second: 19, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.566 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.001252, mean_absolute_error: 0.463285, mean_q: -0.546730
  612199/5000000: episode: 10188, duration: 1.604s, episode steps: 32, steps per second: 20, episode reward: -1.000

  613715/5000000: episode: 10213, duration: 1.503s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001399, mean_absolute_error: 0.458899, mean_q: -0.539563
  613742/5000000: episode: 10214, duration: 1.356s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001398, mean_absolute_error: 0.459285, mean_q: -0.540723
  613768/5000000: episode: 10215, duration: 1.462s, episode steps: 26, steps per second: 18, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.001427, mean_absolute_error: 0.458987, mean_q: -0.540425
  613914/5000000: episode: 10216, duration: 7.243s, episode steps: 146, steps per second: 20, episode reward: -1.00

  615348/5000000: episode: 10241, duration: 1.514s, episode steps: 29, steps per second: 19, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.207 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.001727, mean_absolute_error: 0.450076, mean_q: -0.528239
  615374/5000000: episode: 10242, duration: 1.251s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.001495, mean_absolute_error: 0.451319, mean_q: -0.528885
  615427/5000000: episode: 10243, duration: 2.535s, episode steps: 53, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.001505, mean_absolute_error: 0.452005, mean_q: -0.529220
  615458/5000000: episode: 10244, duration: 1.520s, episode steps: 31, steps per second: 20, episode reward: -1.000

  616896/5000000: episode: 10269, duration: 1.175s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.002479, mean_absolute_error: 0.446315, mean_q: -0.518388
  616928/5000000: episode: 10270, duration: 1.709s, episode steps: 32, steps per second: 19, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.656 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.002649, mean_absolute_error: 0.443495, mean_q: -0.517908
  616958/5000000: episode: 10271, duration: 1.454s, episode steps: 30, steps per second: 21, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.133 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.003135, mean_absolute_error: 0.441830, mean_q: -0.514762
  617047/5000000: episode: 10272, duration: 4.298s, episode steps: 89, steps per second: 21, episode reward: -1.000

  618376/5000000: episode: 10297, duration: 3.957s, episode steps: 76, steps per second: 19, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.303 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.004645, mean_absolute_error: 0.433103, mean_q: -0.496612
  618408/5000000: episode: 10298, duration: 1.544s, episode steps: 32, steps per second: 21, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.969 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.004467, mean_absolute_error: 0.432292, mean_q: -0.496869
  618611/5000000: episode: 10299, duration: 9.829s, episode steps: 203, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.350 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.006410, mean_absolute_error: 0.432380, mean_q: -0.493008
  618982/5000000: episode: 10300, duration: 16.962s, episode steps: 371, steps per second: 22, episode reward: -1.

  620316/5000000: episode: 10325, duration: 2.992s, episode steps: 61, steps per second: 20, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.590 [0.000, 5.000], mean observation: 0.073 [0.000, 24.000], loss: 0.012757, mean_absolute_error: 0.404362, mean_q: -0.420309
  620341/5000000: episode: 10326, duration: 1.152s, episode steps: 25, steps per second: 22, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.014465, mean_absolute_error: 0.403670, mean_q: -0.418477
  620402/5000000: episode: 10327, duration: 2.971s, episode steps: 61, steps per second: 21, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.705 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.025756, mean_absolute_error: 0.406206, mean_q: -0.411379
  620458/5000000: episode: 10328, duration: 2.678s, episode steps: 56, steps per second: 21, episode reward: -1.000

  622352/5000000: episode: 10353, duration: 9.914s, episode steps: 207, steps per second: 21, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.083113, mean_absolute_error: 0.403017, mean_q: -0.230255
  622377/5000000: episode: 10354, duration: 1.202s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.110603, mean_absolute_error: 0.411195, mean_q: -0.208738
  622516/5000000: episode: 10355, duration: 6.834s, episode steps: 139, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.059 [0.000, 24.000], loss: 0.083259, mean_absolute_error: 0.403005, mean_q: -0.203786
  622662/5000000: episode: 10356, duration: 7.157s, episode steps: 146, steps per second: 20, episode reward: -1.

  623924/5000000: episode: 10381, duration: 1.487s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.226546, mean_absolute_error: 0.604493, mean_q: 0.173474
  623951/5000000: episode: 10382, duration: 1.397s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.296 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.145853, mean_absolute_error: 0.571090, mean_q: 0.162226
  624064/5000000: episode: 10383, duration: 5.474s, episode steps: 113, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.637 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.216010, mean_absolute_error: 0.581786, mean_q: 0.184942
  624091/5000000: episode: 10384, duration: 1.409s, episode steps: 27, steps per second: 19, episode reward: -1.000, 

  625570/5000000: episode: 10409, duration: 1.318s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.269 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.348833, mean_absolute_error: 1.034942, mean_q: 0.796853
  625620/5000000: episode: 10410, duration: 2.548s, episode steps: 50, steps per second: 20, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.280122, mean_absolute_error: 1.017026, mean_q: 0.753801
  625647/5000000: episode: 10411, duration: 1.318s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.352497, mean_absolute_error: 1.041052, mean_q: 0.790955
  625681/5000000: episode: 10412, duration: 1.769s, episode steps: 34, steps per second: 19, episode reward: -1.000, m

  627066/5000000: episode: 10437, duration: 5.941s, episode steps: 117, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.407791, mean_absolute_error: 1.537052, mean_q: 1.375441
  627151/5000000: episode: 10438, duration: 4.374s, episode steps: 85, steps per second: 19, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.576 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.358486, mean_absolute_error: 1.615493, mean_q: 1.454267
  627245/5000000: episode: 10439, duration: 4.374s, episode steps: 94, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.415 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.378751, mean_absolute_error: 1.641249, mean_q: 1.469235
  627304/5000000: episode: 10440, duration: 2.842s, episode steps: 59, steps per second: 21, episode reward: -1.000, 

  628808/5000000: episode: 10465, duration: 3.220s, episode steps: 65, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.431 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.328964, mean_absolute_error: 2.160022, mean_q: 2.108932
  628838/5000000: episode: 10466, duration: 1.498s, episode steps: 30, steps per second: 20, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 3.067 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.435418, mean_absolute_error: 2.219663, mean_q: 2.169984
  628864/5000000: episode: 10467, duration: 1.369s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.115 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.392494, mean_absolute_error: 2.208317, mean_q: 2.149091
  629035/5000000: episode: 10468, duration: 8.463s, episode steps: 171, steps per second: 20, episode reward: -1.000, 

  630987/5000000: episode: 10493, duration: 1.362s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 1.929 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.455707, mean_absolute_error: 2.692189, mean_q: 2.803509
  631100/5000000: episode: 10494, duration: 5.658s, episode steps: 113, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.522 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.455932, mean_absolute_error: 2.810312, mean_q: 2.937440
  631125/5000000: episode: 10495, duration: 1.289s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.355306, mean_absolute_error: 2.728302, mean_q: 2.860356
  631152/5000000: episode: 10496, duration: 1.301s, episode steps: 27, steps per second: 21, episode reward: -1.000, 

  632583/5000000: episode: 10521, duration: 3.189s, episode steps: 61, steps per second: 19, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.459 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.318045, mean_absolute_error: 2.879878, mean_q: 3.100746
  632608/5000000: episode: 10522, duration: 1.167s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.314807, mean_absolute_error: 2.810170, mean_q: 3.044938
  632665/5000000: episode: 10523, duration: 2.785s, episode steps: 57, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.561 [0.000, 5.000], mean observation: 0.075 [0.000, 24.000], loss: 0.385137, mean_absolute_error: 2.931736, mean_q: 3.144412
  632730/5000000: episode: 10524, duration: 3.162s, episode steps: 65, steps per second: 21, episode reward: -1.000, m

  634122/5000000: episode: 10549, duration: 7.569s, episode steps: 151, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.309562, mean_absolute_error: 2.972423, mean_q: 3.268475
  634188/5000000: episode: 10550, duration: 3.238s, episode steps: 66, steps per second: 20, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.879 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.281358, mean_absolute_error: 2.969186, mean_q: 3.262087
  634217/5000000: episode: 10551, duration: 1.455s, episode steps: 29, steps per second: 20, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.724 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.270014, mean_absolute_error: 3.088656, mean_q: 3.375354
  634254/5000000: episode: 10552, duration: 1.843s, episode steps: 37, steps per second: 20, episode reward: -1.000, 

  635754/5000000: episode: 10577, duration: 1.293s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.240 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.240526, mean_absolute_error: 2.968284, mean_q: 3.307130
  635823/5000000: episode: 10578, duration: 3.390s, episode steps: 69, steps per second: 20, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.623 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.245765, mean_absolute_error: 2.995780, mean_q: 3.335030
  635849/5000000: episode: 10579, duration: 1.257s, episode steps: 26, steps per second: 21, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.615 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.273007, mean_absolute_error: 2.889066, mean_q: 3.233621
  635876/5000000: episode: 10580, duration: 1.295s, episode steps: 27, steps per second: 21, episode reward: -1.000, m

  637758/5000000: episode: 10605, duration: 4.502s, episode steps: 93, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.301 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.217107, mean_absolute_error: 2.872217, mean_q: 3.237402
  637784/5000000: episode: 10606, duration: 1.305s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.212947, mean_absolute_error: 2.805430, mean_q: 3.176298
  637830/5000000: episode: 10607, duration: 2.342s, episode steps: 46, steps per second: 20, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.435 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.243692, mean_absolute_error: 2.922901, mean_q: 3.291145
  637883/5000000: episode: 10608, duration: 2.555s, episode steps: 53, steps per second: 21, episode reward: -1.000, m

  639605/5000000: episode: 10633, duration: 2.077s, episode steps: 42, steps per second: 20, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.381 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.197914, mean_absolute_error: 2.744319, mean_q: 3.095983
  639630/5000000: episode: 10634, duration: 1.284s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.174468, mean_absolute_error: 2.771762, mean_q: 3.117184
  639656/5000000: episode: 10635, duration: 1.343s, episode steps: 26, steps per second: 19, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.205846, mean_absolute_error: 2.711317, mean_q: 3.066026
  639720/5000000: episode: 10636, duration: 3.217s, episode steps: 64, steps per second: 20, episode reward: -1.000, m

  640798/5000000: episode: 10661, duration: 1.265s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.173413, mean_absolute_error: 2.624310, mean_q: 2.963611
  640823/5000000: episode: 10662, duration: 1.241s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.162756, mean_absolute_error: 2.568201, mean_q: 2.905936
  640930/5000000: episode: 10663, duration: 5.122s, episode steps: 107, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.458 [0.000, 5.000], mean observation: 0.064 [0.000, 24.000], loss: 0.163605, mean_absolute_error: 2.631108, mean_q: 2.957230
  641214/5000000: episode: 10664, duration: 13.872s, episode steps: 284, steps per second: 20, episode reward: -1.000

  642671/5000000: episode: 10689, duration: 1.344s, episode steps: 27, steps per second: 20, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.131059, mean_absolute_error: 2.340047, mean_q: 2.603120
  642697/5000000: episode: 10690, duration: 1.324s, episode steps: 26, steps per second: 20, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.136018, mean_absolute_error: 2.406272, mean_q: 2.687461
  642729/5000000: episode: 10691, duration: 1.623s, episode steps: 32, steps per second: 20, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.344 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.141780, mean_absolute_error: 2.423282, mean_q: 2.698298
  642813/5000000: episode: 10692, duration: 3.970s, episode steps: 84, steps per second: 21, episode reward: -1.000, m

  645010/5000000: episode: 10717, duration: 2.837s, episode steps: 56, steps per second: 20, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.268 [0.000, 5.000], mean observation: 0.067 [0.000, 24.000], loss: 0.119879, mean_absolute_error: 2.170524, mean_q: 2.351232
  645134/5000000: episode: 10718, duration: 6.238s, episode steps: 124, steps per second: 20, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.597 [0.000, 5.000], mean observation: 0.074 [0.000, 24.000], loss: 0.117061, mean_absolute_error: 2.168989, mean_q: 2.358374
  645159/5000000: episode: 10719, duration: 1.215s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.107482, mean_absolute_error: 2.174351, mean_q: 2.368747
  645221/5000000: episode: 10720, duration: 3.090s, episode steps: 62, steps per second: 20, episode reward: -1.000, 

  646832/5000000: episode: 10745, duration: 4.403s, episode steps: 92, steps per second: 21, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.091378, mean_absolute_error: 1.988463, mean_q: 2.156819
  646993/5000000: episode: 10746, duration: 8.026s, episode steps: 161, steps per second: 20, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.466 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.093829, mean_absolute_error: 1.919703, mean_q: 2.080049
  647018/5000000: episode: 10747, duration: 1.295s, episode steps: 25, steps per second: 19, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.100229, mean_absolute_error: 1.911463, mean_q: 2.071126
  647043/5000000: episode: 10748, duration: 1.177s, episode steps: 25, steps per second: 21, episode reward: -1.000, 

  648624/5000000: episode: 10773, duration: 4.406s, episode steps: 95, steps per second: 22, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 1.989 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.075140, mean_absolute_error: 1.762720, mean_q: 1.886485
  648678/5000000: episode: 10774, duration: 2.592s, episode steps: 54, steps per second: 21, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.080194, mean_absolute_error: 1.756288, mean_q: 1.885892
  648824/5000000: episode: 10775, duration: 7.092s, episode steps: 146, steps per second: 21, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.582 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.074150, mean_absolute_error: 1.701934, mean_q: 1.826194
  648895/5000000: episode: 10776, duration: 3.174s, episode steps: 71, steps per second: 22, episode reward: -1.000, 

  650344/5000000: episode: 10801, duration: 1.462s, episode steps: 31, steps per second: 21, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.742 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.048810, mean_absolute_error: 1.261987, mean_q: 1.341855
  650518/5000000: episode: 10802, duration: 8.313s, episode steps: 174, steps per second: 21, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.368 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.055424, mean_absolute_error: 1.256890, mean_q: 1.329100
  650578/5000000: episode: 10803, duration: 2.854s, episode steps: 60, steps per second: 21, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.056929, mean_absolute_error: 1.235108, mean_q: 1.300359
  650603/5000000: episode: 10804, duration: 1.223s, episode steps: 25, steps per second: 20, episode reward: -1.000, 

  651988/5000000: episode: 10829, duration: 2.246s, episode steps: 46, steps per second: 20, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.565 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.043191, mean_absolute_error: 1.130198, mean_q: 1.139900
  652034/5000000: episode: 10830, duration: 2.350s, episode steps: 46, steps per second: 20, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.239 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.048879, mean_absolute_error: 1.125333, mean_q: 1.139581
  652092/5000000: episode: 10831, duration: 2.840s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.045339, mean_absolute_error: 1.123813, mean_q: 1.139830
  652120/5000000: episode: 10832, duration: 1.232s, episode steps: 28, steps per second: 23, episode reward: -1.000, m

  653517/5000000: episode: 10857, duration: 1.571s, episode steps: 33, steps per second: 21, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.182 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.039558, mean_absolute_error: 1.030147, mean_q: 1.008498
  653600/5000000: episode: 10858, duration: 3.997s, episode steps: 83, steps per second: 21, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.398 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.040488, mean_absolute_error: 1.031117, mean_q: 1.010294
  653635/5000000: episode: 10859, duration: 1.686s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.486 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.041685, mean_absolute_error: 1.026301, mean_q: 0.998221
  653693/5000000: episode: 10860, duration: 2.719s, episode steps: 58, steps per second: 21, episode reward: -1.000, m

  654842/5000000: episode: 10885, duration: 3.187s, episode steps: 67, steps per second: 21, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.478 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.037526, mean_absolute_error: 0.962038, mean_q: 0.917587
  654871/5000000: episode: 10886, duration: 1.363s, episode steps: 29, steps per second: 21, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.068 [0.000, 24.000], loss: 0.035101, mean_absolute_error: 0.954054, mean_q: 0.906326
  654896/5000000: episode: 10887, duration: 1.181s, episode steps: 25, steps per second: 21, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.039322, mean_absolute_error: 0.956005, mean_q: 0.910992
  654949/5000000: episode: 10888, duration: 2.513s, episode steps: 53, steps per second: 21, episode reward: -1.000, m

  656581/5000000: episode: 10913, duration: 2.949s, episode steps: 59, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.831 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.032692, mean_absolute_error: 0.874402, mean_q: 0.785510
  656608/5000000: episode: 10914, duration: 1.401s, episode steps: 27, steps per second: 19, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.852 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.031449, mean_absolute_error: 0.885420, mean_q: 0.778844
  656722/5000000: episode: 10915, duration: 5.372s, episode steps: 114, steps per second: 21, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.605 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.031627, mean_absolute_error: 0.875628, mean_q: 0.775118
  656754/5000000: episode: 10916, duration: 1.586s, episode steps: 32, steps per second: 20, episode reward: -1.000, 

  657968/5000000: episode: 10941, duration: 7.117s, episode steps: 140, steps per second: 20, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.593 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.028312, mean_absolute_error: 0.828851, mean_q: 0.706918
  658003/5000000: episode: 10942, duration: 1.695s, episode steps: 35, steps per second: 21, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.457 [0.000, 5.000], mean observation: 0.065 [0.000, 24.000], loss: 0.029997, mean_absolute_error: 0.835643, mean_q: 0.694828
  658128/5000000: episode: 10943, duration: 5.882s, episode steps: 125, steps per second: 21, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.456 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.028174, mean_absolute_error: 0.828727, mean_q: 0.702303
  658153/5000000: episode: 10944, duration: 1.201s, episode steps: 25, steps per second: 21, episode reward: -1.000,

  659559/5000000: episode: 10969, duration: 4.853s, episode steps: 102, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.706 [0.000, 5.000], mean observation: 0.071 [0.000, 24.000], loss: 0.026779, mean_absolute_error: 0.780810, mean_q: 0.636011
  659638/5000000: episode: 10970, duration: 3.424s, episode steps: 79, steps per second: 23, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.658 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.026693, mean_absolute_error: 0.781114, mean_q: 0.630455
  659696/5000000: episode: 10971, duration: 2.899s, episode steps: 58, steps per second: 20, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.569 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.026271, mean_absolute_error: 0.778446, mean_q: 0.636032
  659793/5000000: episode: 10972, duration: 4.968s, episode steps: 97, steps per second: 20, episode reward: -1.000, 

  661218/5000000: episode: 10997, duration: 1.843s, episode steps: 37, steps per second: 20, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.351 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.022172, mean_absolute_error: 0.739641, mean_q: 0.561902
  661245/5000000: episode: 10998, duration: 1.292s, episode steps: 27, steps per second: 21, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.111 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.021555, mean_absolute_error: 0.729872, mean_q: 0.556713
  661348/5000000: episode: 10999, duration: 4.947s, episode steps: 103, steps per second: 21, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.524 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.022541, mean_absolute_error: 0.725504, mean_q: 0.553397
  661373/5000000: episode: 11000, duration: 1.329s, episode steps: 25, steps per second: 19, episode reward: -1.000, 

  662551/5000000: episode: 11025, duration: 3.001s, episode steps: 57, steps per second: 19, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.069 [0.000, 24.000], loss: 0.022117, mean_absolute_error: 0.700569, mean_q: 0.510819
  662579/5000000: episode: 11026, duration: 1.343s, episode steps: 28, steps per second: 21, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.321 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.021739, mean_absolute_error: 0.709593, mean_q: 0.503329
  662840/5000000: episode: 11027, duration: 11.943s, episode steps: 261, steps per second: 22, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.464 [0.000, 5.000], mean observation: 0.060 [0.000, 24.000], loss: 0.021771, mean_absolute_error: 0.697419, mean_q: 0.494710
  662867/5000000: episode: 11028, duration: 1.306s, episode steps: 27, steps per second: 21, episode reward: -1.000,

  663975/5000000: episode: 11053, duration: 1.564s, episode steps: 31, steps per second: 20, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.581 [0.000, 5.000], mean observation: 0.070 [0.000, 24.000], loss: 0.022474, mean_absolute_error: 0.668428, mean_q: 0.451193
  664089/5000000: episode: 11054, duration: 5.562s, episode steps: 114, steps per second: 20, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.553 [0.000, 5.000], mean observation: 0.066 [0.000, 24.000], loss: 0.020968, mean_absolute_error: 0.669187, mean_q: 0.448186
  664114/5000000: episode: 11055, duration: 1.261s, episode steps: 25, steps per second: 20, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.072 [0.000, 24.000], loss: 0.022771, mean_absolute_error: 0.663457, mean_q: 0.449965
  664139/5000000: episode: 11056, duration: 1.191s, episode steps: 25, steps per second: 21, episode reward: -1.000, 

In [14]:
dqn.save_weights(model_path, overwrite=True)