In [1]:
import numpy as np
import os
import tensorflow as tf
import warnings

from keras.models import Model
from keras.layers import Dense, Flatten, Convolution2D, Input
from keras.optimizers import Adam
from pommerman.configs import ffa_v0_env
from pommerman.envs.v0 import Pomme
from pommerman.agents import SimpleAgent, BaseAgent
from pommerman.constants import BOARD_SIZE
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.core import Env, Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint, Callback

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
number_of_training_steps = 5000000
log_interval = 1000
file_log_path = './dqn/rl_logs/cnn_128_3_3_dense_128_1/log.txt'
tensorboard_path = './dqn/logs/cnn_128_3_3_dense_128_1/'
model_path = './dqn/model/cnn_128_3_3_dense_128_1/model{step}.h4'

In [3]:
if not os.path.isdir(os.path.dirname(file_log_path)):
    os.makedirs(os.path.dirname(file_log_path))
if not os.path.isdir(os.path.dirname(model_path)):
    os.makedirs(os.path.dirname(model_path))

In [4]:
class TensorforceAgent(BaseAgent):
    def act(self, obs, action_space):
        pass


class TensorboardLogger(Callback):
    """Logging in tensorboard without tensorflow ops."""
    def __init__(self, log_dir):
        # Some algorithms compute multiple episodes at once since they are multi-threaded.
        # We therefore use a dictionary that is indexed by the episode to separate episodes
        # from each other.
        self.observations = {}
        self.rewards = {}
        self.actions = {}
        self.metrics = {}
        self.step = 0
        """Creates a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def log_scalar(self, tag, value, step):
        """Log a scalar variable.
        Parameter
        ----------
        tag : basestring
            Name of the scalar
        value
        step : int
            training iteration
        """
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def on_train_begin(self, logs):
        self.metrics_names = self.model.metrics_names

    def on_episode_begin(self, episode, logs):
        self.observations[episode] = []
        self.rewards[episode] = []
        self.actions[episode] = []
        self.metrics[episode] = []

    def on_episode_end(self, episode, logs):
        episode_steps = len(self.observations[episode])
        variables = {
            'step': self.step,
            'nb_steps': self.params['nb_steps'],
            'episode_steps': episode_steps,
            'episode_reward': np.sum(self.rewards[episode]),
            'reward_mean': np.mean(self.rewards[episode]),
            'reward_min': np.min(self.rewards[episode]),
            'reward_max': np.max(self.rewards[episode]),
            'action_mean': np.mean(self.actions[episode]),
            'action_min': np.min(self.actions[episode]),
            'action_max': np.max(self.actions[episode]),
            'obs_mean': np.mean(self.observations[episode]),
            'obs_min': np.min(self.observations[episode]),
            'obs_max': np.max(self.observations[episode]),
        }

        # Format all metrics.
        metrics = np.array(self.metrics[episode])
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            for idx, name in enumerate(self.metrics_names):
                try:
                    value = np.nanmean(metrics[:, idx])
                except Warning:
                    value = -1
                variables[name] = value
        for key, value in variables.items():
            self.log_scalar(key, value, episode + 1)

        # Free up resources.
        del self.observations[episode]
        del self.rewards[episode]
        del self.actions[episode]
        del self.metrics[episode]

    def on_step_end(self, step, logs):
        episode = logs['episode']
        self.observations[episode].append(logs['observation'])
        self.rewards[episode].append(logs['reward'])
        self.actions[episode].append(logs['action'])
        self.metrics[episode].append(logs['metrics'])
        self.step += 1

In [5]:
# Instantiate the environment
config = ffa_v0_env()
env = Pomme(**config["env_kwargs"])
np.random.seed(0)
env.seed(0)
# Add 3 random agents
agents = []
for agent_id in range(3):
    agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
env.set_agents(agents)
env.set_training_agent(agents[-1].agent_id)
env.set_init_game_state(None)
nb_actions = env.action_space.n


def create_model(actions, input_shape=(13, 13, 17,)):
    inp = Input(input_shape)
    x = Convolution2D(128, 3, activation='relu')(inp)
    x = Convolution2D(128, 3, activation='relu')(x)
    x = Convolution2D(128, 3, activation='relu')(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(actions)(x)
    model = Model(inputs=inp, outputs=out)
    return model


# Next, we build a very simple model regardless of the dueling architecture
# if you enable dueling network in DQN , DQN will build a dueling network base on your model automatically
# Also, you can build a dueling network by yourself and turn off the dueling network in DQN.
model = create_model(nb_actions)
print(model.summary())

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 13, 13, 17)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 128)       19712     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 128)         147584    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         147584    
_________________________________________________________________
flatten_1 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               802944    
__________________________________

In [6]:
class EnvWrapper(Env):
    """The abstract environment class that is used by all agents. This class has the exact
        same API that OpenAI Gym uses so that integrating with it is trivial. In contrast to the
        OpenAI Gym implementation, this class only defines the abstract methods without any actual
        implementation.
        To implement your own environment, you need to define the following methods:
        - `step`
        - `reset`
        - `render`
        - `close`
        Refer to the [Gym documentation](https://gym.openai.com/docs/#environments).
        """
    reward_range = (-1, 1)
    action_space = None
    observation_space = None

    def __init__(self, gym, board_size):
        self.gym = gym
        self.action_space = gym.action_space
        self.observation_space = gym.observation_space
        self.reward_range = gym.reward_range
        self.board_size = board_size

    def step(self, action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).
        # Arguments
            action (object): An action provided by the environment.
        # Returns
            observation (object): Agent's observation of the current environment.
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        obs = self.gym.get_observations()
        all_actions = self.gym.act(obs)
        all_actions.insert(self.gym.training_agent, action)
        state, reward, terminal, info = self.gym.step(all_actions)
        agent_state = self.featurize(state[self.gym.training_agent])
        agent_reward = reward[self.gym.training_agent]
        return agent_state, agent_reward, terminal, info

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.
        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        obs = self.gym.reset()
        agent_obs = self.featurize(obs[self.gym.training_agent])
        return agent_obs

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.)
        # Arguments
            mode (str): The mode to render with.
            close (bool): Close all open renderings.
        """
        self.gym.render(mode=mode, close=close)

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        self.gym.close()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise self.gym.seed(seed)

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()

    def featurize(self, obs):
        shape = (self.board_size, self.board_size, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)  # Rigid = 1
        wood_map = get_map(board, 2)  # Wood = 2
        bomb_map = get_map(board, 3)  # Bomb = 3
        flames_map = get_map(board, 4)  # Flames = 4
        fog_map = get_map(board, 5)  # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)  # ExtraBomb = 6
        incr_range_map = get_map(board, 7)  # IncrRange = 7
        kick_map = get_map(board, 8)  # Kick = 8
        skull_map = get_map(board, 9)  # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value)  # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = np.full(shape, obs["ammo"])
        blast_strength = np.full(shape, obs["blast_strength"])
        can_kick = np.full(shape, int(obs["can_kick"]))

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life, ammo, blast_strength, can_kick], axis=2)
        return obs

    def __del__(self):
        self.close()

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)


class CustomProcessor(Processor):
    def process_state_batch(self, batch):
        """Processes an entire batch of states and returns it.
        # Arguments
            batch (list): List of states
        # Returns
            Processed list of states
        """
        batch = np.squeeze(batch, axis=1)
        return batch

    def process_info(self, info):
        """Processes the info as obtained from the environment for use in an agent and
        returns it.
        """
        info['result'] = info['result'].value
        return info

In [7]:
env_wrapper = EnvWrapper(env, BOARD_SIZE)
processor = CustomProcessor()

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
file_logger = FileLogger(file_log_path, interval=log_interval)
checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval)
tensorboard = TensorboardLogger(tensorboard_path)
callbacks=[file_logger, checkpoint, tensorboard]
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=32,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy,
               processor=processor, batch_size=256)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
if os.path.isfile(model_path):
    dqn.load_weights(model_path)

In [8]:
history = dqn.fit(env_wrapper, nb_steps=number_of_training_steps, visualize=False, verbose=2,
        nb_max_episode_steps=env._max_steps,
                  callbacks=callbacks)

Training for 5000000 steps ...




     124/5000000: episode: 1, duration: 4.975s, episode steps: 124, steps per second: 25, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.605 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 0.019161, mean_absolute_error: 0.062530, mean_q: 0.072905
     160/5000000: episode: 2, duration: 0.964s, episode steps: 36, steps per second: 37, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.139 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.003029, mean_absolute_error: 0.014267, mean_q: 0.018118
     191/5000000: episode: 3, duration: 0.961s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.484 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.005703, mean_absolute_error: 0.020971, mean_q: 0.019113
     221/5000000: episode: 4, duration: 0.915s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.

    1696/5000000: episode: 30, duration: 1.219s, episode steps: 40, steps per second: 33, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.125 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.002195, mean_absolute_error: 0.141790, mean_q: 0.132113
    1721/5000000: episode: 31, duration: 0.797s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001858, mean_absolute_error: 0.133113, mean_q: 0.142925
    1807/5000000: episode: 32, duration: 2.571s, episode steps: 86, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.686 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001975, mean_absolute_error: 0.143641, mean_q: 0.146485
    1832/5000000: episode: 33, duration: 0.796s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: 

    3199/5000000: episode: 59, duration: 3.113s, episode steps: 106, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.651 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 0.002200, mean_absolute_error: 0.270874, mean_q: 0.193236
    3228/5000000: episode: 60, duration: 0.834s, episode steps: 29, steps per second: 35, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.002161, mean_absolute_error: 0.273611, mean_q: 0.210320
    3284/5000000: episode: 61, duration: 1.769s, episode steps: 56, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.696 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.002527, mean_absolute_error: 0.284790, mean_q: 0.190271
    3311/5000000: episode: 62, duration: 0.931s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward:

    4684/5000000: episode: 88, duration: 1.137s, episode steps: 34, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.235 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.003276, mean_absolute_error: 0.373371, mean_q: 0.223338
    4750/5000000: episode: 89, duration: 2.103s, episode steps: 66, steps per second: 31, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.364 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.003150, mean_absolute_error: 0.377917, mean_q: 0.229583
    4843/5000000: episode: 90, duration: 2.566s, episode steps: 93, steps per second: 36, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.441 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 0.003292, mean_absolute_error: 0.389600, mean_q: 0.226574
    4870/5000000: episode: 91, duration: 0.841s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: 

    7013/5000000: episode: 117, duration: 5.323s, episode steps: 160, steps per second: 30, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.156 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.008658, mean_absolute_error: 0.635840, mean_q: 0.553955
    7100/5000000: episode: 118, duration: 2.552s, episode steps: 87, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.632 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 0.011455, mean_absolute_error: 0.639172, mean_q: 0.556034
    7130/5000000: episode: 119, duration: 0.871s, episode steps: 30, steps per second: 34, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.007075, mean_absolute_error: 0.651334, mean_q: 0.578347
    7216/5000000: episode: 120, duration: 2.513s, episode steps: 86, steps per second: 34, episode reward: -1.000, mean rew

    9090/5000000: episode: 146, duration: 0.800s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.022871, mean_absolute_error: 0.891993, mean_q: 0.931839
    9244/5000000: episode: 147, duration: 4.542s, episode steps: 154, steps per second: 34, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.487 [0.000, 5.000], mean observation: 0.297 [0.000, 24.000], loss: 0.012924, mean_absolute_error: 0.946025, mean_q: 0.971805
    9338/5000000: episode: 148, duration: 2.562s, episode steps: 94, steps per second: 37, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 1.947 [0.000, 5.000], mean observation: 0.272 [0.000, 24.000], loss: 0.013726, mean_absolute_error: 0.965220, mean_q: 1.001138
    9411/5000000: episode: 149, duration: 2.255s, episode steps: 73, steps per second: 32, episode reward: -1.000, mean rew

   10665/5000000: episode: 175, duration: 0.917s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.038723, mean_absolute_error: 1.091958, mean_q: 1.169060
   10699/5000000: episode: 176, duration: 0.978s, episode steps: 34, steps per second: 35, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.147 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.024738, mean_absolute_error: 1.143651, mean_q: 1.207365
   10744/5000000: episode: 177, duration: 1.384s, episode steps: 45, steps per second: 33, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.016325, mean_absolute_error: 1.087919, mean_q: 1.138474
   10775/5000000: episode: 178, duration: 0.953s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean rewa

   12017/5000000: episode: 204, duration: 0.872s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.030514, mean_absolute_error: 1.234821, mean_q: 1.308703
   12226/5000000: episode: 205, duration: 5.738s, episode steps: 209, steps per second: 36, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.249 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.036174, mean_absolute_error: 1.341842, mean_q: 1.437251
   12256/5000000: episode: 206, duration: 0.908s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.032319, mean_absolute_error: 1.313245, mean_q: 1.431138
   12364/5000000: episode: 207, duration: 3.533s, episode steps: 108, steps per second: 31, episode reward: -1.000, mean re

   14052/5000000: episode: 233, duration: 2.382s, episode steps: 77, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.117 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 0.171753, mean_absolute_error: 1.849547, mean_q: 2.087283
   14077/5000000: episode: 234, duration: 0.798s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.080 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.115957, mean_absolute_error: 1.845084, mean_q: 2.122133
   14141/5000000: episode: 235, duration: 2.092s, episode steps: 64, steps per second: 31, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 1.984 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.070467, mean_absolute_error: 1.872392, mean_q: 2.139311
   14175/5000000: episode: 236, duration: 1.007s, episode steps: 34, steps per second: 34, episode reward: -1.000, mean rewa

   16139/5000000: episode: 262, duration: 3.448s, episode steps: 111, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.694 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 0.373824, mean_absolute_error: 2.464952, mean_q: 2.918965
   16177/5000000: episode: 263, duration: 1.228s, episode steps: 38, steps per second: 31, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.316 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.783535, mean_absolute_error: 2.532005, mean_q: 2.965835
   16406/5000000: episode: 264, duration: 7.235s, episode steps: 229, steps per second: 32, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.389 [0.000, 5.000], mean observation: 0.296 [0.000, 24.000], loss: 0.247022, mean_absolute_error: 2.811566, mean_q: 3.377690
   16432/5000000: episode: 265, duration: 0.864s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean re

   18242/5000000: episode: 291, duration: 1.149s, episode steps: 34, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.294 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.287898, mean_absolute_error: 3.625019, mean_q: 4.346249
   18275/5000000: episode: 292, duration: 1.070s, episode steps: 33, steps per second: 31, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.256115, mean_absolute_error: 3.298964, mean_q: 4.013709
   18411/5000000: episode: 293, duration: 4.267s, episode steps: 136, steps per second: 32, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.199 [0.000, 5.000], mean observation: 0.313 [0.000, 24.000], loss: 0.251869, mean_absolute_error: 3.552112, mean_q: 4.304165
   18504/5000000: episode: 294, duration: 2.737s, episode steps: 93, steps per second: 34, episode reward: -1.000, mean rew

   22282/5000000: episode: 320, duration: 4.356s, episode steps: 158, steps per second: 36, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.228 [0.000, 5.000], mean observation: 0.315 [0.000, 24.000], loss: 1.602040, mean_absolute_error: 5.989498, mean_q: 7.277184
   22442/5000000: episode: 321, duration: 4.740s, episode steps: 160, steps per second: 34, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.506 [0.000, 5.000], mean observation: 0.309 [0.000, 24.000], loss: 3.049371, mean_absolute_error: 6.438074, mean_q: 7.815755
   22512/5000000: episode: 322, duration: 2.110s, episode steps: 70, steps per second: 33, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 2.607267, mean_absolute_error: 6.551975, mean_q: 7.983460
   22589/5000000: episode: 323, duration: 1.917s, episode steps: 77, steps per second: 40, episode reward: -1.000, mean re

   25893/5000000: episode: 348, duration: 7.403s, episode steps: 262, steps per second: 35, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.557 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 4.344586, mean_absolute_error: 11.434114, mean_q: 14.229420
   25975/5000000: episode: 349, duration: 2.372s, episode steps: 82, steps per second: 35, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.256 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 2.168801, mean_absolute_error: 11.015734, mean_q: 13.758506
   26096/5000000: episode: 350, duration: 3.969s, episode steps: 121, steps per second: 30, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.736 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 2.037979, mean_absolute_error: 12.012156, mean_q: 14.921510
   26165/5000000: episode: 351, duration: 1.755s, episode steps: 69, steps per second: 39, episode reward: -1.000, m

   28521/5000000: episode: 376, duration: 3.982s, episode steps: 147, steps per second: 37, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.313 [0.000, 24.000], loss: 5.581753, mean_absolute_error: 15.866000, mean_q: 19.787130
   28546/5000000: episode: 377, duration: 0.778s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 8.342045, mean_absolute_error: 17.636896, mean_q: 21.817865
   28579/5000000: episode: 378, duration: 0.930s, episode steps: 33, steps per second: 35, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 10.188277, mean_absolute_error: 17.126854, mean_q: 21.362143
   28636/5000000: episode: 379, duration: 1.704s, episode steps: 57, steps per second: 33, episode reward: -1.000, m

   31185/5000000: episode: 404, duration: 9.510s, episode steps: 342, steps per second: 36, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.190 [0.000, 5.000], mean observation: 0.313 [0.000, 24.000], loss: 25.340752, mean_absolute_error: 22.886370, mean_q: 28.503170
   31244/5000000: episode: 405, duration: 1.632s, episode steps: 59, steps per second: 36, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 11.576795, mean_absolute_error: 24.068295, mean_q: 29.800552
   31356/5000000: episode: 406, duration: 3.094s, episode steps: 112, steps per second: 36, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 1.848 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 11.158919, mean_absolute_error: 23.412693, mean_q: 29.159382
   31477/5000000: episode: 407, duration: 3.127s, episode steps: 121, steps per second: 39, episode reward: -1.00

   33879/5000000: episode: 432, duration: 1.569s, episode steps: 57, steps per second: 36, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.263 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 15.482067, mean_absolute_error: 29.928177, mean_q: 37.157459
   33967/5000000: episode: 433, duration: 2.536s, episode steps: 88, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.659 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 10.587373, mean_absolute_error: 28.078852, mean_q: 34.996098
   34023/5000000: episode: 434, duration: 9.369s, episode steps: 56, steps per second: 6, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 25.596249, mean_absolute_error: 29.624655, mean_q: 36.856731
   34244/5000000: episode: 435, duration: 6.409s, episode steps: 221, steps per second: 34, episode reward: -1.000, 

   38861/5000000: episode: 460, duration: 1.284s, episode steps: 40, steps per second: 31, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.075 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 99.811935, mean_absolute_error: 57.117256, mean_q: 71.040405
   38915/5000000: episode: 461, duration: 1.606s, episode steps: 54, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.722 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 89.692467, mean_absolute_error: 55.479794, mean_q: 69.081749
   39003/5000000: episode: 462, duration: 2.977s, episode steps: 88, steps per second: 30, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.386 [0.000, 5.000], mean observation: 0.276 [0.000, 24.000], loss: 48.537331, mean_absolute_error: 52.826542, mean_q: 66.053291
   39036/5000000: episode: 463, duration: 0.885s, episode steps: 33, steps per second: 37, episode reward: -1.000, 

   41346/5000000: episode: 488, duration: 1.038s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.030 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 81.752197, mean_absolute_error: 63.323250, mean_q: 79.527618
   41373/5000000: episode: 489, duration: 0.818s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 162.871750, mean_absolute_error: 67.487221, mean_q: 82.564362
   41403/5000000: episode: 490, duration: 0.936s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 3.467 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 94.539391, mean_absolute_error: 63.594646, mean_q: 78.906334
   41538/5000000: episode: 491, duration: 3.413s, episode steps: 135, steps per second: 40, episode reward: -1.000

   43937/5000000: episode: 516, duration: 3.369s, episode steps: 108, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.944 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 119.610550, mean_absolute_error: 75.674133, mean_q: 94.354294
   43999/5000000: episode: 517, duration: 1.941s, episode steps: 62, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 1.677 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 87.020042, mean_absolute_error: 74.230293, mean_q: 92.990509
   44068/5000000: episode: 518, duration: 1.985s, episode steps: 69, steps per second: 35, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.580 [0.000, 5.000], mean observation: 0.267 [0.000, 24.000], loss: 71.642815, mean_absolute_error: 73.374893, mean_q: 91.723595
   44220/5000000: episode: 519, duration: 4.918s, episode steps: 152, steps per second: 31, episode reward: -1.00

   49465/5000000: episode: 544, duration: 0.775s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 637.053345, mean_absolute_error: 113.318367, mean_q: 141.570709
   49527/5000000: episode: 545, duration: 1.784s, episode steps: 62, steps per second: 35, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 1.710 [0.000, 5.000], mean observation: 0.269 [0.000, 24.000], loss: 428.000427, mean_absolute_error: 107.061272, mean_q: 133.590515
   49891/5000000: episode: 546, duration: 11.000s, episode steps: 364, steps per second: 33, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 563.274902, mean_absolute_error: 118.697929, mean_q: 147.917664
   49916/5000000: episode: 547, duration: 0.765s, episode steps: 25, steps per second: 33, episode rewar

   52284/5000000: episode: 572, duration: 1.874s, episode steps: 59, steps per second: 31, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 3.508 [1.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 4538.505371, mean_absolute_error: 165.622391, mean_q: 205.771652
   52508/5000000: episode: 573, duration: 7.140s, episode steps: 224, steps per second: 31, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 1.344 [0.000, 5.000], mean observation: 0.280 [0.000, 24.000], loss: 1554.803345, mean_absolute_error: 171.634232, mean_q: 211.109940
   52732/5000000: episode: 574, duration: 6.157s, episode steps: 224, steps per second: 36, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.688 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 1043.996826, mean_absolute_error: 181.602325, mean_q: 223.938431
   52807/5000000: episode: 575, duration: 2.324s, episode steps: 75, steps per second: 32, episode re

   55954/5000000: episode: 600, duration: 3.103s, episode steps: 100, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.275 [0.000, 24.000], loss: 1031.156372, mean_absolute_error: 211.285492, mean_q: 262.065735
   56037/5000000: episode: 601, duration: 2.673s, episode steps: 83, steps per second: 31, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 1.108 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 1291.495483, mean_absolute_error: 208.751663, mean_q: 257.040985
   56090/5000000: episode: 602, duration: 1.591s, episode steps: 53, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.377 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 1229.518188, mean_absolute_error: 205.947281, mean_q: 255.850510
   56116/5000000: episode: 603, duration: 0.722s, episode steps: 26, steps per second: 36, episode rew

   59457/5000000: episode: 628, duration: 5.581s, episode steps: 183, steps per second: 33, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 1.683 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 3584.703125, mean_absolute_error: 245.201126, mean_q: 304.261749
   59483/5000000: episode: 629, duration: 0.824s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.462 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 1362.445557, mean_absolute_error: 248.475403, mean_q: 311.704498
   59508/5000000: episode: 630, duration: 0.752s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.720 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 1120.711914, mean_absolute_error: 239.101898, mean_q: 301.136658
   59540/5000000: episode: 631, duration: 0.957s, episode steps: 32, steps per second: 33, episode rew

   62691/5000000: episode: 656, duration: 0.811s, episode steps: 28, steps per second: 35, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.750 [1.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 1640.396851, mean_absolute_error: 299.854156, mean_q: 375.511047
   63077/5000000: episode: 657, duration: 9.080s, episode steps: 386, steps per second: 43, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.777 [0.000, 5.000], mean observation: 0.300 [0.000, 24.000], loss: 3234.802246, mean_absolute_error: 288.763489, mean_q: 359.433167
   63255/5000000: episode: 658, duration: 5.746s, episode steps: 178, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.331 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 2390.983154, mean_absolute_error: 280.628906, mean_q: 353.785645
   63580/5000000: episode: 659, duration: 8.818s, episode steps: 325, steps per second: 37, episode r

   67632/5000000: episode: 684, duration: 2.299s, episode steps: 87, steps per second: 38, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 3361.576416, mean_absolute_error: 355.197998, mean_q: 442.589752
   67767/5000000: episode: 685, duration: 4.054s, episode steps: 135, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 3.022 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 2666.546387, mean_absolute_error: 364.185638, mean_q: 452.786346
   67877/5000000: episode: 686, duration: 3.277s, episode steps: 110, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 1.018 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 2308.160645, mean_absolute_error: 382.679779, mean_q: 476.325928
   67940/5000000: episode: 687, duration: 1.819s, episode steps: 63, steps per second: 35, episode re

   70047/5000000: episode: 712, duration: 1.448s, episode steps: 47, steps per second: 32, episode reward: -1.000, mean reward: -0.021 [-1.000, 0.000], mean action: 0.723 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 4429.743164, mean_absolute_error: 388.366730, mean_q: 481.364655
   70227/5000000: episode: 713, duration: 5.533s, episode steps: 180, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 1.706 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 7330.067383, mean_absolute_error: 414.404205, mean_q: 514.155396
   70252/5000000: episode: 714, duration: 0.740s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 4.760 [2.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 3766.993164, mean_absolute_error: 360.888855, mean_q: 448.139648
   70415/5000000: episode: 715, duration: 4.944s, episode steps: 163, steps per second: 33, episode re

   71813/5000000: episode: 740, duration: 1.723s, episode steps: 57, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.509 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 23268.199219, mean_absolute_error: 579.613159, mean_q: 716.363098
   71838/5000000: episode: 741, duration: 0.807s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 4.880 [4.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 9292.808594, mean_absolute_error: 525.871704, mean_q: 656.850769
   71865/5000000: episode: 742, duration: 0.775s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 4.852 [2.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 23426.447266, mean_absolute_error: 553.556458, mean_q: 678.599915
   71890/5000000: episode: 743, duration: 0.897s, episode steps: 25, steps per second: 28, episode re

   73743/5000000: episode: 768, duration: 0.928s, episode steps: 34, steps per second: 37, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 3.029 [1.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 22192.375000, mean_absolute_error: 633.700439, mean_q: 780.724426
   73942/5000000: episode: 769, duration: 5.712s, episode steps: 199, steps per second: 35, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 1.613 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 85002.703125, mean_absolute_error: 659.297607, mean_q: 789.024963
   74174/5000000: episode: 770, duration: 13.971s, episode steps: 232, steps per second: 17, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.866 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 33294.550781, mean_absolute_error: 693.701111, mean_q: 844.522095
   74200/5000000: episode: 771, duration: 0.808s, episode steps: 26, steps per second: 32, episod

   75849/5000000: episode: 796, duration: 4.509s, episode steps: 147, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 1.190 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 115133.703125, mean_absolute_error: 767.956299, mean_q: 910.280640
   75911/5000000: episode: 797, duration: 1.837s, episode steps: 62, steps per second: 34, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 1.597 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 74039.343750, mean_absolute_error: 898.604797, mean_q: 1088.606323
   76125/5000000: episode: 798, duration: 6.318s, episode steps: 214, steps per second: 34, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 1.561 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 71595.101562, mean_absolute_error: 852.239929, mean_q: 1017.537903
   76212/5000000: episode: 799, duration: 2.565s, episode steps: 87, steps per second: 34, epis

   78099/5000000: episode: 824, duration: 4.976s, episode steps: 164, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.537 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 41077.855469, mean_absolute_error: 1040.605713, mean_q: 1239.837036
   78210/5000000: episode: 825, duration: 3.297s, episode steps: 111, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 1.928 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 99452.359375, mean_absolute_error: 880.179321, mean_q: 1073.070557
   78242/5000000: episode: 826, duration: 1.011s, episode steps: 32, steps per second: 32, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 3.594 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 107800.335938, mean_absolute_error: 938.066284, mean_q: 1145.635742
   78409/5000000: episode: 827, duration: 4.892s, episode steps: 167, steps per second: 34, e

   80053/5000000: episode: 852, duration: 0.772s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 132636.906250, mean_absolute_error: 1021.467712, mean_q: 1267.470947
   80082/5000000: episode: 853, duration: 0.914s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.621 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 65152.835938, mean_absolute_error: 1104.208984, mean_q: 1350.967896
   80396/5000000: episode: 854, duration: 9.866s, episode steps: 314, steps per second: 32, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.264 [0.000, 5.000], mean observation: 0.351 [0.000, 24.000], loss: 116002.531250, mean_absolute_error: 1256.489380, mean_q: 1481.203857
   80472/5000000: episode: 855, duration: 2.320s, episode steps: 76, steps per second: 33, 

   82396/5000000: episode: 880, duration: 0.841s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 49703.992188, mean_absolute_error: 1565.833618, mean_q: 1825.844727
   82447/5000000: episode: 881, duration: 1.552s, episode steps: 51, steps per second: 33, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.725 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 358770.562500, mean_absolute_error: 1602.121826, mean_q: 1910.155762
   82636/5000000: episode: 882, duration: 6.258s, episode steps: 189, steps per second: 30, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.815 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 424029.625000, mean_absolute_error: 1578.374390, mean_q: 1911.096191
   82688/5000000: episode: 883, duration: 1.701s, episode steps: 52, steps per second: 31, 

   84212/5000000: episode: 908, duration: 3.401s, episode steps: 141, steps per second: 41, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.177 [0.000, 5.000], mean observation: 0.269 [0.000, 24.000], loss: 1915205.000000, mean_absolute_error: 1650.722778, mean_q: 2026.629395
   84380/5000000: episode: 909, duration: 5.086s, episode steps: 168, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.506 [0.000, 5.000], mean observation: 0.269 [0.000, 24.000], loss: 170613.734375, mean_absolute_error: 1564.010376, mean_q: 1908.349365
   84405/5000000: episode: 910, duration: 0.798s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 115309.742188, mean_absolute_error: 1526.537598, mean_q: 1849.381592
   84437/5000000: episode: 911, duration: 0.969s, episode steps: 32, steps per second: 3

   85906/5000000: episode: 936, duration: 0.740s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.320 [0.000, 5.000], mean observation: 0.230 [0.000, 24.000], loss: 76054.773438, mean_absolute_error: 1756.478149, mean_q: 2142.021973
   85938/5000000: episode: 937, duration: 0.908s, episode steps: 32, steps per second: 35, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.562 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 91206.015625, mean_absolute_error: 1579.264404, mean_q: 1983.136719
   85969/5000000: episode: 938, duration: 0.958s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 333699.562500, mean_absolute_error: 1551.255371, mean_q: 1924.400269
   86021/5000000: episode: 939, duration: 1.624s, episode steps: 52, steps per second: 32, ep

   87430/5000000: episode: 964, duration: 0.958s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 1.935 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 213476.125000, mean_absolute_error: 1980.112549, mean_q: 2433.100586
   87523/5000000: episode: 965, duration: 2.888s, episode steps: 93, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.688 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 152710.078125, mean_absolute_error: 1716.232910, mean_q: 2137.161377
   87688/5000000: episode: 966, duration: 5.278s, episode steps: 165, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 315283.531250, mean_absolute_error: 2025.645508, mean_q: 2414.812988
   87892/5000000: episode: 967, duration: 5.179s, episode steps: 204, steps per second: 39

   89532/5000000: episode: 992, duration: 0.746s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 227776.484375, mean_absolute_error: 2115.100830, mean_q: 2524.245117
   89588/5000000: episode: 993, duration: 1.653s, episode steps: 56, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.554 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 404236.531250, mean_absolute_error: 2100.654785, mean_q: 2533.362793
   89642/5000000: episode: 994, duration: 1.668s, episode steps: 54, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.278 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 93692.109375, mean_absolute_error: 2017.367188, mean_q: 2475.974854
   89667/5000000: episode: 995, duration: 0.803s, episode steps: 25, steps per second: 31, e

   91151/5000000: episode: 1019, duration: 0.916s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.267 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 263205.375000, mean_absolute_error: 2653.277100, mean_q: 3150.041992
   91179/5000000: episode: 1020, duration: 0.841s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 113568.960938, mean_absolute_error: 2481.486816, mean_q: 2990.984131
   91239/5000000: episode: 1021, duration: 1.802s, episode steps: 60, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.450 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 299814.625000, mean_absolute_error: 2299.985352, mean_q: 2759.547119
   91353/5000000: episode: 1022, duration: 3.280s, episode steps: 114, steps per second:

   93032/5000000: episode: 1046, duration: 0.793s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 144820.250000, mean_absolute_error: 2065.121338, mean_q: 2502.942383
   93058/5000000: episode: 1047, duration: 0.885s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 136830.750000, mean_absolute_error: 2423.472412, mean_q: 2979.632568
   93300/5000000: episode: 1048, duration: 7.082s, episode steps: 242, steps per second: 34, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.686 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 379472.062500, mean_absolute_error: 2286.210205, mean_q: 2750.240723
   93407/5000000: episode: 1049, duration: 3.297s, episode steps: 107, steps per second

   95076/5000000: episode: 1073, duration: 1.006s, episode steps: 36, steps per second: 36, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 222795.000000, mean_absolute_error: 2130.145996, mean_q: 2630.138428
   95103/5000000: episode: 1074, duration: 0.783s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.074 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 368028.218750, mean_absolute_error: 2491.680664, mean_q: 3115.534180
   95132/5000000: episode: 1075, duration: 0.899s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 1.276 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 465847.937500, mean_absolute_error: 2259.187256, mean_q: 2753.001953
   95157/5000000: episode: 1076, duration: 0.801s, episode steps: 25, steps per second: 

   96429/5000000: episode: 1100, duration: 2.493s, episode steps: 82, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 444394.156250, mean_absolute_error: 2663.861572, mean_q: 3204.453125
   96510/5000000: episode: 1101, duration: 2.160s, episode steps: 81, steps per second: 38, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.815 [0.000, 5.000], mean observation: 0.228 [0.000, 24.000], loss: 464964.687500, mean_absolute_error: 2721.432129, mean_q: 3270.325684
   96535/5000000: episode: 1102, duration: 0.723s, episode steps: 25, steps per second: 35, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 308805.406250, mean_absolute_error: 3092.357910, mean_q: 3715.402832
   96560/5000000: episode: 1103, duration: 0.748s, episode steps: 25, steps per second: 

   98024/5000000: episode: 1127, duration: 6.375s, episode steps: 230, steps per second: 36, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.396 [0.000, 5.000], mean observation: 0.223 [0.000, 24.000], loss: 523733.000000, mean_absolute_error: 2683.113525, mean_q: 3241.847900
   98417/5000000: episode: 1128, duration: 11.064s, episode steps: 393, steps per second: 36, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.476 [0.000, 5.000], mean observation: 0.321 [0.000, 24.000], loss: 2065790.000000, mean_absolute_error: 3391.779053, mean_q: 4015.180664
   98685/5000000: episode: 1129, duration: 8.568s, episode steps: 268, steps per second: 31, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 715875.750000, mean_absolute_error: 3331.535889, mean_q: 4012.512207
   98711/5000000: episode: 1130, duration: 0.815s, episode steps: 26, steps per sec

   99908/5000000: episode: 1154, duration: 0.908s, episode steps: 25, steps per second: 28, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 1071125.500000, mean_absolute_error: 4269.571289, mean_q: 4950.579102
   99963/5000000: episode: 1155, duration: 1.748s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 3.255 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 478911.812500, mean_absolute_error: 3693.551514, mean_q: 4389.299316
  100088/5000000: episode: 1156, duration: 4.000s, episode steps: 125, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 1096757.750000, mean_absolute_error: 3491.612061, mean_q: 4146.896484
  100114/5000000: episode: 1157, duration: 0.796s, episode steps: 26, steps per secon

  101476/5000000: episode: 1181, duration: 1.879s, episode steps: 61, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.984 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 1471274.625000, mean_absolute_error: 4373.376465, mean_q: 5163.435059
  101511/5000000: episode: 1182, duration: 1.067s, episode steps: 35, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.371 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 560724.625000, mean_absolute_error: 4046.941406, mean_q: 4793.539551
  101540/5000000: episode: 1183, duration: 0.848s, episode steps: 29, steps per second: 34, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 1.793 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 1884687.875000, mean_absolute_error: 5296.008789, mean_q: 6045.212891
  101567/5000000: episode: 1184, duration: 0.896s, episode steps: 27, steps per second

  103500/5000000: episode: 1208, duration: 0.752s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 1140492.125000, mean_absolute_error: 5025.166504, mean_q: 6023.281250
  103526/5000000: episode: 1209, duration: 0.744s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 1332427.500000, mean_absolute_error: 5244.372070, mean_q: 6257.870605
  103551/5000000: episode: 1210, duration: 0.791s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 1385169.625000, mean_absolute_error: 6519.194824, mean_q: 7274.524414
  103576/5000000: episode: 1211, duration: 0.818s, episode steps: 25, steps per secon

  105172/5000000: episode: 1235, duration: 1.130s, episode steps: 37, steps per second: 33, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 1.811 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 1827059.000000, mean_absolute_error: 5642.031738, mean_q: 6805.519043
  105256/5000000: episode: 1236, duration: 2.473s, episode steps: 84, steps per second: 34, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.726 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 2326004.750000, mean_absolute_error: 6356.308105, mean_q: 7368.108398
  105308/5000000: episode: 1237, duration: 1.488s, episode steps: 52, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 2033082.125000, mean_absolute_error: 5216.450195, mean_q: 6322.888672
  105402/5000000: episode: 1238, duration: 3.107s, episode steps: 94, steps per secon

  107035/5000000: episode: 1262, duration: 1.938s, episode steps: 59, steps per second: 30, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 1448662.500000, mean_absolute_error: 6908.904297, mean_q: 8093.356445
  107079/5000000: episode: 1263, duration: 1.406s, episode steps: 44, steps per second: 31, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 1.727 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 44200752.000000, mean_absolute_error: 5532.244141, mean_q: 6587.900391
  107106/5000000: episode: 1264, duration: 0.881s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.926 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 28877452.000000, mean_absolute_error: 7292.823730, mean_q: 8667.084961
  107150/5000000: episode: 1265, duration: 1.326s, episode steps: 44, steps per sec

  108673/5000000: episode: 1289, duration: 2.221s, episode steps: 72, steps per second: 32, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.764 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 5937573.500000, mean_absolute_error: 8457.040039, mean_q: 9901.371094
  108767/5000000: episode: 1290, duration: 2.920s, episode steps: 94, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.351 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 2749867.250000, mean_absolute_error: 7823.536621, mean_q: 9235.725586
  108796/5000000: episode: 1291, duration: 0.849s, episode steps: 29, steps per second: 34, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.103 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 4502949.000000, mean_absolute_error: 8927.214844, mean_q: 10235.798828
  108847/5000000: episode: 1292, duration: 1.423s, episode steps: 51, steps per seco

  110575/5000000: episode: 1316, duration: 1.503s, episode steps: 52, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.962 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 4389836.500000, mean_absolute_error: 9479.642578, mean_q: 11107.368164
  110602/5000000: episode: 1317, duration: 0.788s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 1808247.375000, mean_absolute_error: 8383.584961, mean_q: 9971.874023
  110629/5000000: episode: 1318, duration: 0.867s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 3180641.250000, mean_absolute_error: 12670.645508, mean_q: 14202.416992
  110654/5000000: episode: 1319, duration: 0.810s, episode steps: 25, steps per se

  112237/5000000: episode: 1343, duration: 1.701s, episode steps: 57, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.719 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 15326191.000000, mean_absolute_error: 12849.178711, mean_q: 16048.512695
  112292/5000000: episode: 1344, duration: 1.700s, episode steps: 55, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.873 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 55956356.000000, mean_absolute_error: 14652.954102, mean_q: 16920.966797
  112345/5000000: episode: 1345, duration: 1.948s, episode steps: 53, steps per second: 27, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.396 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 20962024.000000, mean_absolute_error: 13866.126953, mean_q: 16462.716797
  112405/5000000: episode: 1346, duration: 1.949s, episode steps: 60, steps 

  113665/5000000: episode: 1370, duration: 6.927s, episode steps: 220, steps per second: 32, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.327 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 36985612.000000, mean_absolute_error: 15807.056641, mean_q: 18832.308594
  113690/5000000: episode: 1371, duration: 0.772s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.760 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 13512746.000000, mean_absolute_error: 14288.246094, mean_q: 17567.630859
  113716/5000000: episode: 1372, duration: 0.828s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 7548782.000000, mean_absolute_error: 16844.191406, mean_q: 20319.357422
  113741/5000000: episode: 1373, duration: 0.685s, episode steps: 25, steps 

  115355/5000000: episode: 1397, duration: 0.727s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 30295668.000000, mean_absolute_error: 16989.550781, mean_q: 21054.873047
  115563/5000000: episode: 1398, duration: 5.734s, episode steps: 208, steps per second: 36, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.562 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 27618172.000000, mean_absolute_error: 21161.158203, mean_q: 24703.564453
  115650/5000000: episode: 1399, duration: 2.518s, episode steps: 87, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.943 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 20039238.000000, mean_absolute_error: 17325.123047, mean_q: 21114.417969
  115678/5000000: episode: 1400, duration: 0.925s, episode steps: 28, steps

  117005/5000000: episode: 1424, duration: 1.801s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 3.036 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 86649112.000000, mean_absolute_error: 24921.898438, mean_q: 27885.496094
  117069/5000000: episode: 1425, duration: 1.884s, episode steps: 64, steps per second: 34, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.188 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 15539329.000000, mean_absolute_error: 18983.191406, mean_q: 22951.812500
  117094/5000000: episode: 1426, duration: 0.800s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 46873036.000000, mean_absolute_error: 22791.089844, mean_q: 26221.675781
  117157/5000000: episode: 1427, duration: 1.890s, episode steps: 63, steps 

  118056/5000000: episode: 1451, duration: 1.122s, episode steps: 36, steps per second: 32, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.611 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 105840216.000000, mean_absolute_error: 31182.201172, mean_q: 35618.902344
  118092/5000000: episode: 1452, duration: 1.156s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.472 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 24278714.000000, mean_absolute_error: 18897.429688, mean_q: 23300.759766
  118119/5000000: episode: 1453, duration: 0.794s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 28445508.000000, mean_absolute_error: 25175.134766, mean_q: 29721.666016
  118213/5000000: episode: 1454, duration: 2.896s, episode steps: 94, steps

  119362/5000000: episode: 1478, duration: 1.513s, episode steps: 52, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 83641768.000000, mean_absolute_error: 27656.029297, mean_q: 32154.132812
  119426/5000000: episode: 1479, duration: 2.204s, episode steps: 64, steps per second: 29, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.422 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 31589324.000000, mean_absolute_error: 21482.337891, mean_q: 26563.582031
  119494/5000000: episode: 1480, duration: 2.014s, episode steps: 68, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.456 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 30562032.000000, mean_absolute_error: 21947.673828, mean_q: 26987.546875
  119526/5000000: episode: 1481, duration: 0.949s, episode steps: 32, steps 

  120978/5000000: episode: 1505, duration: 0.811s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 94997552.000000, mean_absolute_error: 30958.197266, mean_q: 35577.984375
  121004/5000000: episode: 1506, duration: 3.442s, episode steps: 26, steps per second: 8, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 74676136.000000, mean_absolute_error: 29728.781250, mean_q: 35369.031250
  121030/5000000: episode: 1507, duration: 0.851s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 34967724.000000, mean_absolute_error: 26541.669922, mean_q: 32571.242188
  121061/5000000: episode: 1508, duration: 1.001s, episode steps: 31, steps p

  122442/5000000: episode: 1532, duration: 8.105s, episode steps: 250, steps per second: 31, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.504 [0.000, 5.000], mean observation: 0.297 [0.000, 24.000], loss: 41431120.000000, mean_absolute_error: 27734.576172, mean_q: 33622.925781
  122478/5000000: episode: 1533, duration: 0.988s, episode steps: 36, steps per second: 36, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 45333660.000000, mean_absolute_error: 26696.525391, mean_q: 33314.566406
  122515/5000000: episode: 1534, duration: 1.098s, episode steps: 37, steps per second: 34, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.189 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 61238916.000000, mean_absolute_error: 27720.210938, mean_q: 33683.445312
  122583/5000000: episode: 1535, duration: 2.172s, episode steps: 68, steps

  124025/5000000: episode: 1559, duration: 0.770s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.240 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 82182632.000000, mean_absolute_error: 24921.363281, mean_q: 30715.101562
  124104/5000000: episode: 1560, duration: 2.229s, episode steps: 79, steps per second: 35, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 78616192.000000, mean_absolute_error: 26525.003906, mean_q: 32384.337891
  124132/5000000: episode: 1561, duration: 0.859s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 210664800.000000, mean_absolute_error: 29497.896484, mean_q: 34753.480469
  124185/5000000: episode: 1562, duration: 1.568s, episode steps: 53, steps

  125664/5000000: episode: 1586, duration: 0.745s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.080 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 78427840.000000, mean_absolute_error: 31051.060547, mean_q: 36468.503906
  125723/5000000: episode: 1587, duration: 1.796s, episode steps: 59, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.373 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 33019528.000000, mean_absolute_error: 29450.996094, mean_q: 35856.160156
  125901/5000000: episode: 1588, duration: 5.807s, episode steps: 178, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.039 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 69779664.000000, mean_absolute_error: 29956.410156, mean_q: 35949.074219
  125974/5000000: episode: 1589, duration: 2.176s, episode steps: 73, steps

  127415/5000000: episode: 1613, duration: 1.011s, episode steps: 35, steps per second: 35, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.829 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 185270368.000000, mean_absolute_error: 30122.767578, mean_q: 36453.085938
  127442/5000000: episode: 1614, duration: 0.812s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 55430428.000000, mean_absolute_error: 29209.189453, mean_q: 34736.230469
  127470/5000000: episode: 1615, duration: 0.848s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.143 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 21061490.000000, mean_absolute_error: 26746.509766, mean_q: 33336.425781
  127500/5000000: episode: 1616, duration: 0.819s, episode steps: 30, steps

  128865/5000000: episode: 1640, duration: 7.213s, episode steps: 247, steps per second: 34, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.413 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 48618732.000000, mean_absolute_error: 30528.501953, mean_q: 36558.371094
  128974/5000000: episode: 1641, duration: 3.472s, episode steps: 109, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.596 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 109386280.000000, mean_absolute_error: 28340.841797, mean_q: 34171.398438
  128999/5000000: episode: 1642, duration: 0.814s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 61598752.000000, mean_absolute_error: 29341.380859, mean_q: 35990.914062
  129025/5000000: episode: 1643, duration: 0.867s, episode steps: 26, ste

  130095/5000000: episode: 1667, duration: 0.895s, episode steps: 30, steps per second: 34, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.267 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 61315048.000000, mean_absolute_error: 33461.800781, mean_q: 38545.949219
  130125/5000000: episode: 1668, duration: 0.917s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 90901640.000000, mean_absolute_error: 31390.679688, mean_q: 37892.292969
  130150/5000000: episode: 1669, duration: 0.803s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.280 [1.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 56241788.000000, mean_absolute_error: 34684.742188, mean_q: 40787.984375
  130206/5000000: episode: 1670, duration: 1.804s, episode steps: 56, steps 

  131710/5000000: episode: 1694, duration: 1.018s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.576 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 59788956.000000, mean_absolute_error: 31342.935547, mean_q: 38718.679688
  131737/5000000: episode: 1695, duration: 0.760s, episode steps: 27, steps per second: 36, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 14753013.000000, mean_absolute_error: 29734.791016, mean_q: 36248.117188
  131800/5000000: episode: 1696, duration: 2.106s, episode steps: 63, steps per second: 30, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 31969448.000000, mean_absolute_error: 28858.742188, mean_q: 35593.062500
  131852/5000000: episode: 1697, duration: 1.518s, episode steps: 52, steps 

  133558/5000000: episode: 1721, duration: 11.635s, episode steps: 412, steps per second: 35, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 2.461 [0.000, 5.000], mean observation: 0.289 [0.000, 24.000], loss: 123207816.000000, mean_absolute_error: 33018.582031, mean_q: 39746.375000
  133584/5000000: episode: 1722, duration: 0.823s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 23348022.000000, mean_absolute_error: 34473.941406, mean_q: 40824.664062
  133612/5000000: episode: 1723, duration: 0.824s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 76237176.000000, mean_absolute_error: 31430.792969, mean_q: 38664.179688
  133642/5000000: episode: 1724, duration: 0.912s, episode steps: 30, ste

  134985/5000000: episode: 1748, duration: 2.374s, episode steps: 79, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 106540528.000000, mean_absolute_error: 35825.675781, mean_q: 42286.398438
  135017/5000000: episode: 1749, duration: 1.008s, episode steps: 32, steps per second: 32, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 601634496.000000, mean_absolute_error: 51586.304688, mean_q: 56118.496094
  135057/5000000: episode: 1750, duration: 1.133s, episode steps: 40, steps per second: 35, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.625 [0.000, 5.000], mean observation: 0.256 [0.000, 24.000], loss: 97729656.000000, mean_absolute_error: 27815.187500, mean_q: 33965.183594
  135119/5000000: episode: 1751, duration: 1.925s, episode steps: 62, step

  136923/5000000: episode: 1775, duration: 1.154s, episode steps: 39, steps per second: 34, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 57903608.000000, mean_absolute_error: 32379.240234, mean_q: 39927.417969
  136956/5000000: episode: 1776, duration: 1.041s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.152 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 38632548.000000, mean_absolute_error: 37672.523438, mean_q: 45838.457031
  137071/5000000: episode: 1777, duration: 3.796s, episode steps: 115, steps per second: 30, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.417 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 131245864.000000, mean_absolute_error: 36613.609375, mean_q: 43818.218750
  137103/5000000: episode: 1778, duration: 0.979s, episode steps: 32, step

  138682/5000000: episode: 1802, duration: 2.548s, episode steps: 84, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.155 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 49365296.000000, mean_absolute_error: 36669.601562, mean_q: 44073.773438
  138707/5000000: episode: 1803, duration: 0.778s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 54217148.000000, mean_absolute_error: 37916.640625, mean_q: 45414.066406
  138796/5000000: episode: 1804, duration: 2.915s, episode steps: 89, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.876 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 175351088.000000, mean_absolute_error: 31384.373047, mean_q: 38323.476562
  138821/5000000: episode: 1805, duration: 0.822s, episode steps: 25, steps

  140114/5000000: episode: 1829, duration: 1.580s, episode steps: 49, steps per second: 31, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.184 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 34112356.000000, mean_absolute_error: 37612.414062, mean_q: 46266.496094
  140139/5000000: episode: 1830, duration: 0.823s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 70151560.000000, mean_absolute_error: 38225.871094, mean_q: 46719.054688
  140171/5000000: episode: 1831, duration: 0.993s, episode steps: 32, steps per second: 32, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 1.844 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 79265776.000000, mean_absolute_error: 48567.558594, mean_q: 58236.132812
  140205/5000000: episode: 1832, duration: 1.140s, episode steps: 34, steps 

  141688/5000000: episode: 1856, duration: 1.891s, episode steps: 60, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 111663672.000000, mean_absolute_error: 39720.312500, mean_q: 49095.949219
  141713/5000000: episode: 1857, duration: 0.819s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.920 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 265381728.000000, mean_absolute_error: 41091.386719, mean_q: 50350.539062
  141825/5000000: episode: 1858, duration: 3.251s, episode steps: 112, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 79266384.000000, mean_absolute_error: 49991.371094, mean_q: 58693.277344
  141918/5000000: episode: 1859, duration: 2.782s, episode steps: 93, ste

  143302/5000000: episode: 1883, duration: 1.256s, episode steps: 39, steps per second: 31, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 90352472.000000, mean_absolute_error: 46866.742188, mean_q: 53426.964844
  143337/5000000: episode: 1884, duration: 1.174s, episode steps: 35, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.886 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 108786616.000000, mean_absolute_error: 37656.878906, mean_q: 46350.156250
  143389/5000000: episode: 1885, duration: 1.743s, episode steps: 52, steps per second: 30, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.904 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 93169224.000000, mean_absolute_error: 41293.230469, mean_q: 48765.519531
  143417/5000000: episode: 1886, duration: 0.856s, episode steps: 28, steps

  145242/5000000: episode: 1910, duration: 0.851s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.111 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 21320792.000000, mean_absolute_error: 32658.400391, mean_q: 40626.902344
  145365/5000000: episode: 1911, duration: 4.039s, episode steps: 123, steps per second: 30, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.561 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 32215872.000000, mean_absolute_error: 36795.734375, mean_q: 44994.289062
  145390/5000000: episode: 1912, duration: 0.699s, episode steps: 25, steps per second: 36, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.960 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 46204496.000000, mean_absolute_error: 43976.953125, mean_q: 51105.078125
  145415/5000000: episode: 1913, duration: 0.731s, episode steps: 25, steps

  146733/5000000: episode: 1937, duration: 0.669s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 214134656.000000, mean_absolute_error: 46874.675781, mean_q: 53911.484375
  146981/5000000: episode: 1938, duration: 8.319s, episode steps: 248, steps per second: 30, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.504 [0.000, 5.000], mean observation: 0.276 [0.000, 24.000], loss: 66155512.000000, mean_absolute_error: 38577.468750, mean_q: 46582.128906
  147048/5000000: episode: 1939, duration: 2.088s, episode steps: 67, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.358 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 109474288.000000, mean_absolute_error: 43846.691406, mean_q: 51646.070312
  147073/5000000: episode: 1940, duration: 0.836s, episode steps: 25, ste

  148399/5000000: episode: 1964, duration: 0.824s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 1.731 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 37238344.000000, mean_absolute_error: 30748.263672, mean_q: 38175.789062
  148571/5000000: episode: 1965, duration: 4.824s, episode steps: 172, steps per second: 36, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.430 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 104538336.000000, mean_absolute_error: 41623.277344, mean_q: 49986.582031
  148597/5000000: episode: 1966, duration: 0.771s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 69826504.000000, mean_absolute_error: 44964.921875, mean_q: 52521.437500
  148683/5000000: episode: 1967, duration: 2.626s, episode steps: 86, step

  150057/5000000: episode: 1991, duration: 19.222s, episode steps: 66, steps per second: 3, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.682 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 109693096.000000, mean_absolute_error: 38288.285156, mean_q: 46976.777344
  150083/5000000: episode: 1992, duration: 0.813s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.234 [0.000, 24.000], loss: 107685128.000000, mean_absolute_error: 52909.019531, mean_q: 59887.554688
  150111/5000000: episode: 1993, duration: 0.879s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.214 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 86673872.000000, mean_absolute_error: 41541.144531, mean_q: 49273.097656
  150235/5000000: episode: 1994, duration: 3.972s, episode steps: 124, ste

  151835/5000000: episode: 2018, duration: 1.723s, episode steps: 54, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 102729200.000000, mean_absolute_error: 32692.914062, mean_q: 39200.679688
  151890/5000000: episode: 2019, duration: 1.766s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 96532488.000000, mean_absolute_error: 34029.753906, mean_q: 41363.093750
  151925/5000000: episode: 2020, duration: 1.178s, episode steps: 35, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 54576972.000000, mean_absolute_error: 35102.097656, mean_q: 42468.871094
  152076/5000000: episode: 2021, duration: 4.795s, episode steps: 151, step

  153691/5000000: episode: 2045, duration: 0.717s, episode steps: 29, steps per second: 40, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.448 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 87514424.000000, mean_absolute_error: 35796.019531, mean_q: 43317.964844
  153719/5000000: episode: 2046, duration: 0.855s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.643 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 497383008.000000, mean_absolute_error: 44572.601562, mean_q: 51579.699219
  153746/5000000: episode: 2047, duration: 0.854s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 92164128.000000, mean_absolute_error: 40481.652344, mean_q: 46863.765625
  153772/5000000: episode: 2048, duration: 0.861s, episode steps: 26, steps

  154922/5000000: episode: 2072, duration: 2.502s, episode steps: 83, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.422 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 78945808.000000, mean_absolute_error: 32883.980469, mean_q: 39724.492188
  155043/5000000: episode: 2073, duration: 4.024s, episode steps: 121, steps per second: 30, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.413 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 130628584.000000, mean_absolute_error: 39622.511719, mean_q: 47248.796875
  155109/5000000: episode: 2074, duration: 1.875s, episode steps: 66, steps per second: 35, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 43299988.000000, mean_absolute_error: 38114.972656, mean_q: 46150.195312
  155179/5000000: episode: 2075, duration: 1.973s, episode steps: 70, step

  156760/5000000: episode: 2099, duration: 1.598s, episode steps: 52, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 159865984.000000, mean_absolute_error: 46068.632812, mean_q: 53503.691406
  156785/5000000: episode: 2100, duration: 0.821s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 236818816.000000, mean_absolute_error: 36848.378906, mean_q: 44277.449219
  156814/5000000: episode: 2101, duration: 0.901s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 45456992.000000, mean_absolute_error: 35518.875000, mean_q: 43505.617188
  156940/5000000: episode: 2102, duration: 3.465s, episode steps: 126, ste

  158162/5000000: episode: 2126, duration: 0.931s, episode steps: 28, steps per second: 30, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.643 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 34285952.000000, mean_absolute_error: 33781.082031, mean_q: 41120.316406
  158230/5000000: episode: 2127, duration: 2.117s, episode steps: 68, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.838 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 167332304.000000, mean_absolute_error: 40302.589844, mean_q: 47747.953125
  158260/5000000: episode: 2128, duration: 0.872s, episode steps: 30, steps per second: 34, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.233 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 65818780.000000, mean_absolute_error: 39152.312500, mean_q: 44994.574219
  158321/5000000: episode: 2129, duration: 1.786s, episode steps: 61, steps

  159679/5000000: episode: 2153, duration: 1.041s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.182 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 30609466.000000, mean_absolute_error: 37677.968750, mean_q: 46304.187500
  159734/5000000: episode: 2154, duration: 1.608s, episode steps: 55, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.618 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 20757744.000000, mean_absolute_error: 31143.119141, mean_q: 37952.640625
  159873/5000000: episode: 2155, duration: 4.412s, episode steps: 139, steps per second: 32, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.302 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 49578176.000000, mean_absolute_error: 38881.136719, mean_q: 46341.250000
  159934/5000000: episode: 2156, duration: 1.943s, episode steps: 61, steps

  161535/5000000: episode: 2180, duration: 2.614s, episode steps: 88, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 37503344.000000, mean_absolute_error: 33684.808594, mean_q: 40889.472656
  161568/5000000: episode: 2181, duration: 1.043s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 22083462.000000, mean_absolute_error: 34474.515625, mean_q: 41502.500000
  161601/5000000: episode: 2182, duration: 1.041s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 1.758 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 57769216.000000, mean_absolute_error: 43482.656250, mean_q: 51681.382812
  161692/5000000: episode: 2183, duration: 2.602s, episode steps: 91, steps 

  163388/5000000: episode: 2207, duration: 0.908s, episode steps: 27, steps per second: 30, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.259 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 62276080.000000, mean_absolute_error: 35604.003906, mean_q: 43118.972656
  163450/5000000: episode: 2208, duration: 1.950s, episode steps: 62, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 446811136.000000, mean_absolute_error: 41103.636719, mean_q: 47544.519531
  163481/5000000: episode: 2209, duration: 0.882s, episode steps: 31, steps per second: 35, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.484 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 178724160.000000, mean_absolute_error: 38684.011719, mean_q: 46348.183594
  163509/5000000: episode: 2210, duration: 0.929s, episode steps: 28, step

  164744/5000000: episode: 2234, duration: 1.036s, episode steps: 31, steps per second: 30, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 3.032 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 12053685.000000, mean_absolute_error: 31118.292969, mean_q: 38206.296875
  164781/5000000: episode: 2235, duration: 1.076s, episode steps: 37, steps per second: 34, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.162 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 43330464.000000, mean_absolute_error: 39491.167969, mean_q: 47122.035156
  164925/5000000: episode: 2236, duration: 4.773s, episode steps: 144, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 162543824.000000, mean_absolute_error: 38985.832031, mean_q: 46414.445312
  164979/5000000: episode: 2237, duration: 1.757s, episode steps: 54, step

  166471/5000000: episode: 2261, duration: 1.505s, episode steps: 55, steps per second: 37, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.582 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 36821876.000000, mean_absolute_error: 35259.113281, mean_q: 42280.386719
  166530/5000000: episode: 2262, duration: 1.688s, episode steps: 59, steps per second: 35, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.814 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 17034528.000000, mean_absolute_error: 31440.992188, mean_q: 37670.582031
  166570/5000000: episode: 2263, duration: 1.360s, episode steps: 40, steps per second: 29, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.150 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 19902846.000000, mean_absolute_error: 32152.656250, mean_q: 39603.781250
  166595/5000000: episode: 2264, duration: 0.737s, episode steps: 25, steps 

  167912/5000000: episode: 2288, duration: 1.904s, episode steps: 59, steps per second: 31, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.339 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 377122176.000000, mean_absolute_error: 30310.351562, mean_q: 38158.304688
  167940/5000000: episode: 2289, duration: 0.781s, episode steps: 28, steps per second: 36, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 320623968.000000, mean_absolute_error: 36698.546875, mean_q: 43491.187500
  167978/5000000: episode: 2290, duration: 1.066s, episode steps: 38, steps per second: 36, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.447 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 55806496.000000, mean_absolute_error: 31622.572266, mean_q: 38541.984375
  168033/5000000: episode: 2291, duration: 1.512s, episode steps: 55, step

  169437/5000000: episode: 2315, duration: 0.964s, episode steps: 33, steps per second: 34, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.212 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 71388128.000000, mean_absolute_error: 36720.738281, mean_q: 41825.976562
  169524/5000000: episode: 2316, duration: 2.776s, episode steps: 87, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.644 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 235905536.000000, mean_absolute_error: 33864.148438, mean_q: 40632.832031
  169623/5000000: episode: 2317, duration: 2.753s, episode steps: 99, steps per second: 36, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.212 [0.000, 24.000], loss: 55832536.000000, mean_absolute_error: 32501.822266, mean_q: 38593.156250
  169651/5000000: episode: 2318, duration: 0.813s, episode steps: 28, steps

  171085/5000000: episode: 2342, duration: 0.979s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 399431552.000000, mean_absolute_error: 31985.121094, mean_q: 36705.074219
  171110/5000000: episode: 2343, duration: 0.698s, episode steps: 25, steps per second: 36, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.230 [0.000, 24.000], loss: 236060080.000000, mean_absolute_error: 31764.640625, mean_q: 37046.550781
  171172/5000000: episode: 2344, duration: 1.726s, episode steps: 62, steps per second: 36, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.919 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 42854356.000000, mean_absolute_error: 32385.304688, mean_q: 37462.636719
  171202/5000000: episode: 2345, duration: 0.899s, episode steps: 30, step

  172437/5000000: episode: 2369, duration: 4.459s, episode steps: 134, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.522 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 65983856.000000, mean_absolute_error: 30346.246094, mean_q: 36768.960938
  172471/5000000: episode: 2370, duration: 1.061s, episode steps: 34, steps per second: 32, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.676 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 36547404.000000, mean_absolute_error: 28881.179688, mean_q: 35361.066406
  172523/5000000: episode: 2371, duration: 1.678s, episode steps: 52, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 66752440.000000, mean_absolute_error: 27983.197266, mean_q: 33810.167969
  172658/5000000: episode: 2372, duration: 3.924s, episode steps: 135, step

  174637/5000000: episode: 2396, duration: 5.759s, episode steps: 191, steps per second: 33, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.707 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 88194616.000000, mean_absolute_error: 37817.964844, mean_q: 45142.660156
  174667/5000000: episode: 2397, duration: 0.861s, episode steps: 30, steps per second: 35, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 95737552.000000, mean_absolute_error: 34756.765625, mean_q: 43278.851562
  174700/5000000: episode: 2398, duration: 0.992s, episode steps: 33, steps per second: 33, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.727 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 48957608.000000, mean_absolute_error: 30607.613281, mean_q: 37411.289062
  174728/5000000: episode: 2399, duration: 0.813s, episode steps: 28, steps

  176252/5000000: episode: 2423, duration: 6.447s, episode steps: 214, steps per second: 33, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.682 [0.000, 5.000], mean observation: 0.331 [0.000, 24.000], loss: 31849384.000000, mean_absolute_error: 31776.771484, mean_q: 38124.902344
  176309/5000000: episode: 2424, duration: 1.576s, episode steps: 57, steps per second: 36, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.649 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 53306588.000000, mean_absolute_error: 32875.558594, mean_q: 39546.722656
  176376/5000000: episode: 2425, duration: 1.936s, episode steps: 67, steps per second: 35, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.030 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 41160160.000000, mean_absolute_error: 31531.275391, mean_q: 37883.238281
  176403/5000000: episode: 2426, duration: 0.902s, episode steps: 27, steps

  178003/5000000: episode: 2450, duration: 2.249s, episode steps: 70, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.457 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 252830608.000000, mean_absolute_error: 33246.648438, mean_q: 39846.507812
  178028/5000000: episode: 2451, duration: 0.765s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.160 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 46556784.000000, mean_absolute_error: 29724.039062, mean_q: 35731.511719
  178149/5000000: episode: 2452, duration: 3.804s, episode steps: 121, steps per second: 32, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.562 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 59824300.000000, mean_absolute_error: 32750.576172, mean_q: 39226.808594
  178177/5000000: episode: 2453, duration: 0.868s, episode steps: 28, step

  179469/5000000: episode: 2477, duration: 0.794s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.320 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 46099360.000000, mean_absolute_error: 37250.457031, mean_q: 43863.363281
  179497/5000000: episode: 2478, duration: 0.874s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.821 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 204298464.000000, mean_absolute_error: 30854.310547, mean_q: 36200.625000
  179522/5000000: episode: 2479, duration: 0.667s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 120444600.000000, mean_absolute_error: 36928.949219, mean_q: 43778.339844
  179583/5000000: episode: 2480, duration: 1.855s, episode steps: 61, step

  180729/5000000: episode: 2504, duration: 0.902s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.633 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 178030480.000000, mean_absolute_error: 37236.476562, mean_q: 42981.097656
  180784/5000000: episode: 2505, duration: 1.711s, episode steps: 55, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.255 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 66966504.000000, mean_absolute_error: 28658.865234, mean_q: 34593.789062
  180810/5000000: episode: 2506, duration: 0.751s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 43596032.000000, mean_absolute_error: 33622.632812, mean_q: 42636.910156
  180835/5000000: episode: 2507, duration: 0.697s, episode steps: 25, steps

  182184/5000000: episode: 2531, duration: 1.558s, episode steps: 54, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 62506092.000000, mean_absolute_error: 36223.796875, mean_q: 42829.929688
  182210/5000000: episode: 2532, duration: 0.773s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 80491400.000000, mean_absolute_error: 32517.009766, mean_q: 40092.789062
  182235/5000000: episode: 2533, duration: 0.775s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 19051224.000000, mean_absolute_error: 27300.472656, mean_q: 34022.906250
  182289/5000000: episode: 2534, duration: 1.683s, episode steps: 54, steps 

  183531/5000000: episode: 2558, duration: 0.811s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 26792812.000000, mean_absolute_error: 32056.746094, mean_q: 38685.777344
  183613/5000000: episode: 2559, duration: 2.365s, episode steps: 82, steps per second: 35, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.549 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 108553240.000000, mean_absolute_error: 37559.625000, mean_q: 44678.464844
  183692/5000000: episode: 2560, duration: 2.392s, episode steps: 79, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 221314320.000000, mean_absolute_error: 30808.937500, mean_q: 37344.269531
  183718/5000000: episode: 2561, duration: 0.817s, episode steps: 26, step

  185007/5000000: episode: 2585, duration: 1.617s, episode steps: 46, steps per second: 28, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.696 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 93368464.000000, mean_absolute_error: 39313.031250, mean_q: 46728.406250
  185043/5000000: episode: 2586, duration: 1.144s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 43180580.000000, mean_absolute_error: 32090.232422, mean_q: 39184.847656
  185206/5000000: episode: 2587, duration: 4.430s, episode steps: 163, steps per second: 37, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.303 [0.000, 24.000], loss: 67101192.000000, mean_absolute_error: 31191.882812, mean_q: 37376.171875
  185303/5000000: episode: 2588, duration: 2.842s, episode steps: 97, steps

  186587/5000000: episode: 2612, duration: 1.671s, episode steps: 53, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.208 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 46094344.000000, mean_absolute_error: 32363.042969, mean_q: 38590.269531
  186625/5000000: episode: 2613, duration: 1.225s, episode steps: 38, steps per second: 31, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.184 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 121000808.000000, mean_absolute_error: 35227.171875, mean_q: 42398.300781
  186650/5000000: episode: 2614, duration: 0.718s, episode steps: 25, steps per second: 35, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 61293636.000000, mean_absolute_error: 33371.500000, mean_q: 38700.191406
  186677/5000000: episode: 2615, duration: 0.757s, episode steps: 27, steps

  187746/5000000: episode: 2639, duration: 0.787s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 27569536.000000, mean_absolute_error: 22493.134766, mean_q: 26424.712891
  187774/5000000: episode: 2640, duration: 0.834s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.464 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 53675044.000000, mean_absolute_error: 29522.021484, mean_q: 34225.449219
  187839/5000000: episode: 2641, duration: 2.075s, episode steps: 65, steps per second: 31, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.554 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 80558584.000000, mean_absolute_error: 31310.847656, mean_q: 36992.269531
  187866/5000000: episode: 2642, duration: 0.832s, episode steps: 27, steps 

  188769/5000000: episode: 2666, duration: 0.785s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 89592184.000000, mean_absolute_error: 31159.865234, mean_q: 37622.976562
  188807/5000000: episode: 2667, duration: 1.118s, episode steps: 38, steps per second: 34, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.553 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 95690576.000000, mean_absolute_error: 34239.269531, mean_q: 38958.070312
  188843/5000000: episode: 2668, duration: 1.162s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.722 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 46165080.000000, mean_absolute_error: 30550.861328, mean_q: 36009.847656
  188871/5000000: episode: 2669, duration: 0.914s, episode steps: 28, steps 

  190438/5000000: episode: 2693, duration: 3.595s, episode steps: 124, steps per second: 34, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.315 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 176848736.000000, mean_absolute_error: 34038.085938, mean_q: 38668.453125
  190492/5000000: episode: 2694, duration: 1.733s, episode steps: 54, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.796 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 58045728.000000, mean_absolute_error: 33568.988281, mean_q: 38530.113281
  190527/5000000: episode: 2695, duration: 1.024s, episode steps: 35, steps per second: 34, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.371 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 46390936.000000, mean_absolute_error: 28432.324219, mean_q: 32991.167969
  190585/5000000: episode: 2696, duration: 1.878s, episode steps: 58, step

  192263/5000000: episode: 2720, duration: 0.842s, episode steps: 30, steps per second: 36, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 1.933 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 103116344.000000, mean_absolute_error: 31562.500000, mean_q: 37906.667969
  192322/5000000: episode: 2721, duration: 1.815s, episode steps: 59, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 47487216.000000, mean_absolute_error: 30743.685547, mean_q: 35302.804688
  192411/5000000: episode: 2722, duration: 2.842s, episode steps: 89, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.494 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 57031708.000000, mean_absolute_error: 33903.402344, mean_q: 38511.140625
  192438/5000000: episode: 2723, duration: 0.885s, episode steps: 27, steps

  194212/5000000: episode: 2747, duration: 3.675s, episode steps: 119, steps per second: 32, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.714 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 51761972.000000, mean_absolute_error: 28766.578125, mean_q: 33822.644531
  194239/5000000: episode: 2748, duration: 0.885s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 59457952.000000, mean_absolute_error: 32161.414062, mean_q: 38099.085938
  194272/5000000: episode: 2749, duration: 1.032s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 19779352.000000, mean_absolute_error: 30911.837891, mean_q: 35975.386719
  194354/5000000: episode: 2750, duration: 2.348s, episode steps: 82, steps

  195905/5000000: episode: 2774, duration: 1.746s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.509 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 201551616.000000, mean_absolute_error: 33378.414062, mean_q: 37939.144531
  195934/5000000: episode: 2775, duration: 0.956s, episode steps: 29, steps per second: 30, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 57831568.000000, mean_absolute_error: 25885.210938, mean_q: 31837.375000
  195960/5000000: episode: 2776, duration: 0.878s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 150318480.000000, mean_absolute_error: 31644.216797, mean_q: 36509.707031
  195985/5000000: episode: 2777, duration: 0.723s, episode steps: 25, step

  197360/5000000: episode: 2801, duration: 0.762s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.120 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 181418112.000000, mean_absolute_error: 39500.308594, mean_q: 44149.515625
  197386/5000000: episode: 2802, duration: 0.800s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 44666240.000000, mean_absolute_error: 33310.058594, mean_q: 38412.847656
  197514/5000000: episode: 2803, duration: 3.659s, episode steps: 128, steps per second: 35, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.664 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 256615968.000000, mean_absolute_error: 33774.171875, mean_q: 38282.246094
  197589/5000000: episode: 2804, duration: 2.095s, episode steps: 75, ste

  198913/5000000: episode: 2828, duration: 0.935s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.323 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 13756408.000000, mean_absolute_error: 28788.250000, mean_q: 34071.542969
  198939/5000000: episode: 2829, duration: 0.705s, episode steps: 26, steps per second: 37, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 45970628.000000, mean_absolute_error: 27161.451172, mean_q: 32210.957031
  198967/5000000: episode: 2830, duration: 0.900s, episode steps: 28, steps per second: 31, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 117726912.000000, mean_absolute_error: 37883.539062, mean_q: 42381.351562
  198995/5000000: episode: 2831, duration: 0.942s, episode steps: 28, steps

  200584/5000000: episode: 2855, duration: 0.770s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.741 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 176602112.000000, mean_absolute_error: 40262.496094, mean_q: 45113.070312
  200639/5000000: episode: 2856, duration: 1.590s, episode steps: 55, steps per second: 35, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 58448328.000000, mean_absolute_error: 25147.533203, mean_q: 30012.748047
  200673/5000000: episode: 2857, duration: 0.993s, episode steps: 34, steps per second: 34, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.118 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 99281024.000000, mean_absolute_error: 32009.550781, mean_q: 37247.902344
  200704/5000000: episode: 2858, duration: 0.892s, episode steps: 31, steps

  201905/5000000: episode: 2882, duration: 1.221s, episode steps: 37, steps per second: 30, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.568 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 1257291008.000000, mean_absolute_error: 52103.871094, mean_q: 53563.066406
  202049/5000000: episode: 2883, duration: 4.801s, episode steps: 144, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.361 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 114885416.000000, mean_absolute_error: 30835.867188, mean_q: 35830.031250
  202075/5000000: episode: 2884, duration: 0.752s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.846 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 65660156.000000, mean_absolute_error: 21785.152344, mean_q: 26901.853516
  202157/5000000: episode: 2885, duration: 2.576s, episode steps: 82, st

  203359/5000000: episode: 2909, duration: 0.773s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 1.962 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 258399808.000000, mean_absolute_error: 37242.390625, mean_q: 40821.609375
  203412/5000000: episode: 2910, duration: 1.514s, episode steps: 53, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.170 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 152074832.000000, mean_absolute_error: 51431.097656, mean_q: 54868.050781
  203442/5000000: episode: 2911, duration: 0.925s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.100 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 259961168.000000, mean_absolute_error: 43659.332031, mean_q: 48929.769531
  203468/5000000: episode: 2912, duration: 0.808s, episode steps: 26, ste

  204947/5000000: episode: 2936, duration: 1.161s, episode steps: 39, steps per second: 34, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.513 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 320106528.000000, mean_absolute_error: 46553.011719, mean_q: 50849.125000
  204977/5000000: episode: 2937, duration: 0.805s, episode steps: 30, steps per second: 37, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.833 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 463369152.000000, mean_absolute_error: 45586.683594, mean_q: 49225.394531
  205003/5000000: episode: 2938, duration: 0.903s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.346 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 79118472.000000, mean_absolute_error: 44303.304688, mean_q: 48616.500000
  205028/5000000: episode: 2939, duration: 0.716s, episode steps: 25, step

  206870/5000000: episode: 2963, duration: 0.855s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [1.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 121869720.000000, mean_absolute_error: 43115.261719, mean_q: 48703.144531
  206898/5000000: episode: 2964, duration: 0.951s, episode steps: 28, steps per second: 29, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 53132736.000000, mean_absolute_error: 38889.675781, mean_q: 43749.769531
  206931/5000000: episode: 2965, duration: 0.915s, episode steps: 33, steps per second: 36, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.303 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 285970240.000000, mean_absolute_error: 71523.875000, mean_q: 70236.906250
  207026/5000000: episode: 2966, duration: 3.150s, episode steps: 95, step

  208510/5000000: episode: 2990, duration: 0.832s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 191174592.000000, mean_absolute_error: 36540.886719, mean_q: 41734.660156
  208535/5000000: episode: 2991, duration: 0.783s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 19081592.000000, mean_absolute_error: 35601.785156, mean_q: 41245.035156
  208562/5000000: episode: 2992, duration: 0.838s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 265869040.000000, mean_absolute_error: 42654.273438, mean_q: 46416.476562
  208608/5000000: episode: 2993, duration: 1.401s, episode steps: 46, step

  209795/5000000: episode: 3017, duration: 0.824s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.231 [0.000, 24.000], loss: 150852880.000000, mean_absolute_error: 39704.093750, mean_q: 45649.828125
  209824/5000000: episode: 3018, duration: 0.873s, episode steps: 29, steps per second: 33, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 80193104.000000, mean_absolute_error: 38339.859375, mean_q: 42655.179688
  209886/5000000: episode: 3019, duration: 1.884s, episode steps: 62, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.532 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 151456640.000000, mean_absolute_error: 36687.316406, mean_q: 41678.136719
  209916/5000000: episode: 3020, duration: 0.932s, episode steps: 30, step

  211161/5000000: episode: 3044, duration: 1.764s, episode steps: 61, steps per second: 35, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 182818832.000000, mean_absolute_error: 44417.429688, mean_q: 50716.167969
  211186/5000000: episode: 3045, duration: 0.793s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 203220128.000000, mean_absolute_error: 40750.894531, mean_q: 47300.246094
  211212/5000000: episode: 3046, duration: 0.807s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.269 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 248898032.000000, mean_absolute_error: 51433.984375, mean_q: 56469.812500
  211322/5000000: episode: 3047, duration: 2.637s, episode steps: 110, st

  213305/5000000: episode: 3071, duration: 1.187s, episode steps: 36, steps per second: 30, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 146198544.000000, mean_absolute_error: 47382.855469, mean_q: 50452.250000
  213391/5000000: episode: 3072, duration: 2.570s, episode steps: 86, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.535 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 43882180.000000, mean_absolute_error: 34119.738281, mean_q: 39826.335938
  213424/5000000: episode: 3073, duration: 1.022s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.485 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 178938752.000000, mean_absolute_error: 58661.707031, mean_q: 63263.449219
  213451/5000000: episode: 3074, duration: 0.890s, episode steps: 27, step

  214890/5000000: episode: 3098, duration: 1.022s, episode steps: 40, steps per second: 39, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.300 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 186682144.000000, mean_absolute_error: 51619.675781, mean_q: 56464.355469
  214920/5000000: episode: 3099, duration: 0.949s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 88595632.000000, mean_absolute_error: 46512.726562, mean_q: 51293.085938
  214945/5000000: episode: 3100, duration: 0.794s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 323934176.000000, mean_absolute_error: 67089.054688, mean_q: 74392.359375
  215186/5000000: episode: 3101, duration: 7.668s, episode steps: 241, ste

  216418/5000000: episode: 3125, duration: 0.872s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.200 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 189910368.000000, mean_absolute_error: 51045.441406, mean_q: 58067.531250
  216445/5000000: episode: 3126, duration: 0.815s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 155634752.000000, mean_absolute_error: 49173.867188, mean_q: 56166.058594
  216586/5000000: episode: 3127, duration: 4.053s, episode steps: 141, steps per second: 35, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.596 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 362354144.000000, mean_absolute_error: 55137.214844, mean_q: 61357.007812
  216644/5000000: episode: 3128, duration: 1.718s, episode steps: 58, st

  217938/5000000: episode: 3152, duration: 1.020s, episode steps: 34, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.059 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 156632432.000000, mean_absolute_error: 44183.691406, mean_q: 52271.152344
  217969/5000000: episode: 3153, duration: 0.959s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 187284720.000000, mean_absolute_error: 57650.167969, mean_q: 65016.429688
  218205/5000000: episode: 3154, duration: 7.488s, episode steps: 236, steps per second: 32, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.513 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 332281952.000000, mean_absolute_error: 48735.699219, mean_q: 56611.289062
  218260/5000000: episode: 3155, duration: 1.658s, episode steps: 55, st

  219374/5000000: episode: 3179, duration: 2.299s, episode steps: 77, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.675 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 1669773056.000000, mean_absolute_error: 48285.492188, mean_q: 53700.886719
  219406/5000000: episode: 3180, duration: 1.068s, episode steps: 32, steps per second: 30, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.875 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 330344928.000000, mean_absolute_error: 45695.437500, mean_q: 54889.648438
  219481/5000000: episode: 3181, duration: 2.253s, episode steps: 75, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.693 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 242828960.000000, mean_absolute_error: 42911.343750, mean_q: 50212.574219
  219507/5000000: episode: 3182, duration: 0.832s, episode steps: 26, st

  220738/5000000: episode: 3206, duration: 1.945s, episode steps: 66, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.773 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 277802368.000000, mean_absolute_error: 48509.535156, mean_q: 53417.558594
  220763/5000000: episode: 3207, duration: 0.814s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 237374688.000000, mean_absolute_error: 69141.843750, mean_q: 73175.671875
  220831/5000000: episode: 3208, duration: 1.806s, episode steps: 68, steps per second: 38, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.691 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 354672928.000000, mean_absolute_error: 57159.132812, mean_q: 63756.742188
  220890/5000000: episode: 3209, duration: 1.872s, episode steps: 59, ste

  222657/5000000: episode: 3233, duration: 0.773s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 90591072.000000, mean_absolute_error: 62187.441406, mean_q: 68467.828125
  222686/5000000: episode: 3234, duration: 0.962s, episode steps: 29, steps per second: 30, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 31570428.000000, mean_absolute_error: 42241.523438, mean_q: 51971.570312
  222770/5000000: episode: 3235, duration: 2.565s, episode steps: 84, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.583 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 124532872.000000, mean_absolute_error: 48234.562500, mean_q: 54841.968750
  222795/5000000: episode: 3236, duration: 0.805s, episode steps: 25, steps

  224309/5000000: episode: 3260, duration: 2.772s, episode steps: 101, steps per second: 36, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.238 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 441763328.000000, mean_absolute_error: 55640.312500, mean_q: 62121.617188
  224334/5000000: episode: 3261, duration: 0.659s, episode steps: 25, steps per second: 38, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 217827904.000000, mean_absolute_error: 58629.414062, mean_q: 65688.976562
  224362/5000000: episode: 3262, duration: 0.816s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 390901728.000000, mean_absolute_error: 61399.582031, mean_q: 67748.539062
  224595/5000000: episode: 3263, duration: 6.869s, episode steps: 233, s

  226020/5000000: episode: 3287, duration: 1.884s, episode steps: 62, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.855 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 175033872.000000, mean_absolute_error: 52483.296875, mean_q: 57160.875000
  226077/5000000: episode: 3288, duration: 1.674s, episode steps: 57, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 108615040.000000, mean_absolute_error: 48518.871094, mean_q: 54874.675781
  226140/5000000: episode: 3289, duration: 1.631s, episode steps: 63, steps per second: 39, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.476 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 612379904.000000, mean_absolute_error: 58526.789062, mean_q: 60787.371094
  226205/5000000: episode: 3290, duration: 1.917s, episode steps: 65, ste

  227757/5000000: episode: 3314, duration: 1.395s, episode steps: 41, steps per second: 29, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.634 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 834269504.000000, mean_absolute_error: 68903.890625, mean_q: 76841.250000
  227822/5000000: episode: 3315, duration: 1.993s, episode steps: 65, steps per second: 33, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.031 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 1142282880.000000, mean_absolute_error: 50027.730469, mean_q: 57333.429688
  227891/5000000: episode: 3316, duration: 1.820s, episode steps: 69, steps per second: 38, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.217 [0.000, 5.000], mean observation: 0.301 [0.000, 24.000], loss: 2986362880.000000, mean_absolute_error: 58876.238281, mean_q: 67729.351562
  227944/5000000: episode: 3317, duration: 1.524s, episode steps: 53, s

  229459/5000000: episode: 3341, duration: 2.893s, episode steps: 97, steps per second: 34, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.402 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 398946528.000000, mean_absolute_error: 52316.632812, mean_q: 60734.082031
  229617/5000000: episode: 3342, duration: 4.817s, episode steps: 158, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.286 [0.000, 24.000], loss: 75187240.000000, mean_absolute_error: 49101.378906, mean_q: 57239.734375
  229645/5000000: episode: 3343, duration: 0.873s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 70656376.000000, mean_absolute_error: 47081.523438, mean_q: 52804.625000
  229675/5000000: episode: 3344, duration: 1.002s, episode steps: 30, step

  231114/5000000: episode: 3368, duration: 0.916s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.633 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 1118056576.000000, mean_absolute_error: 53992.390625, mean_q: 62138.492188
  231151/5000000: episode: 3369, duration: 1.056s, episode steps: 37, steps per second: 35, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.405 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 297808224.000000, mean_absolute_error: 56577.742188, mean_q: 63590.128906
  231182/5000000: episode: 3370, duration: 0.957s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.161 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 146807520.000000, mean_absolute_error: 47112.113281, mean_q: 54579.304688
  231241/5000000: episode: 3371, duration: 1.632s, episode steps: 59, st

  232623/5000000: episode: 3395, duration: 2.642s, episode steps: 91, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.681 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 253506928.000000, mean_absolute_error: 60523.511719, mean_q: 64970.527344
  232675/5000000: episode: 3396, duration: 1.593s, episode steps: 52, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 41469244.000000, mean_absolute_error: 41155.515625, mean_q: 48186.957031
  232701/5000000: episode: 3397, duration: 0.720s, episode steps: 26, steps per second: 36, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 217202624.000000, mean_absolute_error: 43219.316406, mean_q: 49191.808594
  232732/5000000: episode: 3398, duration: 0.953s, episode steps: 31, step

  234090/5000000: episode: 3422, duration: 0.961s, episode steps: 29, steps per second: 30, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.690 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 85951928.000000, mean_absolute_error: 59295.375000, mean_q: 64259.191406
  234149/5000000: episode: 3423, duration: 1.764s, episode steps: 59, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 1.949 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 243180928.000000, mean_absolute_error: 52402.667969, mean_q: 60527.972656
  234174/5000000: episode: 3424, duration: 0.813s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 200571440.000000, mean_absolute_error: 64017.613281, mean_q: 68746.773438
  234228/5000000: episode: 3425, duration: 1.695s, episode steps: 54, step

  235408/5000000: episode: 3449, duration: 3.461s, episode steps: 111, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 82997744.000000, mean_absolute_error: 50945.109375, mean_q: 58085.179688
  235433/5000000: episode: 3450, duration: 0.825s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 24969004.000000, mean_absolute_error: 47495.308594, mean_q: 54463.250000
  235518/5000000: episode: 3451, duration: 2.543s, episode steps: 85, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.882 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 192051536.000000, mean_absolute_error: 45606.320312, mean_q: 52404.777344
  235571/5000000: episode: 3452, duration: 1.496s, episode steps: 53, step

  236909/5000000: episode: 3476, duration: 2.601s, episode steps: 83, steps per second: 32, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.590 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 305529600.000000, mean_absolute_error: 61103.675781, mean_q: 65070.656250
  237002/5000000: episode: 3477, duration: 2.792s, episode steps: 93, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.613 [0.000, 5.000], mean observation: 0.267 [0.000, 24.000], loss: 300656064.000000, mean_absolute_error: 43553.867188, mean_q: 50615.125000
  237030/5000000: episode: 3478, duration: 0.794s, episode steps: 28, steps per second: 35, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.964 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 1018820224.000000, mean_absolute_error: 62012.105469, mean_q: 68312.187500
  237058/5000000: episode: 3479, duration: 0.876s, episode steps: 28, st

  238665/5000000: episode: 3503, duration: 6.647s, episode steps: 225, steps per second: 34, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.350 [0.000, 24.000], loss: 635375872.000000, mean_absolute_error: 54565.332031, mean_q: 60247.910156
  238692/5000000: episode: 3504, duration: 0.776s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 233726384.000000, mean_absolute_error: 40638.632812, mean_q: 48857.949219
  238797/5000000: episode: 3505, duration: 3.178s, episode steps: 105, steps per second: 33, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.362 [0.000, 5.000], mean observation: 0.283 [0.000, 24.000], loss: 177991872.000000, mean_absolute_error: 42534.398438, mean_q: 50690.890625
  239003/5000000: episode: 3506, duration: 6.034s, episode steps: 206, 

  240228/5000000: episode: 3530, duration: 0.804s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 515561696.000000, mean_absolute_error: 73319.468750, mean_q: 81631.453125
  240287/5000000: episode: 3531, duration: 1.869s, episode steps: 59, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.525 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 147719408.000000, mean_absolute_error: 51611.257812, mean_q: 60274.753906
  240313/5000000: episode: 3532, duration: 0.802s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 316505216.000000, mean_absolute_error: 69497.210938, mean_q: 74139.117188
  240340/5000000: episode: 3533, duration: 0.870s, episode steps: 27, ste

  241541/5000000: episode: 3557, duration: 1.556s, episode steps: 52, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.288 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 150030672.000000, mean_absolute_error: 42355.203125, mean_q: 50135.402344
  241683/5000000: episode: 3558, duration: 4.278s, episode steps: 142, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.732 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 939998336.000000, mean_absolute_error: 55367.785156, mean_q: 60530.113281
  241747/5000000: episode: 3559, duration: 2.044s, episode steps: 64, steps per second: 31, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.297 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 159180272.000000, mean_absolute_error: 59055.765625, mean_q: 65770.250000
  241833/5000000: episode: 3560, duration: 2.642s, episode steps: 86, st

  243447/5000000: episode: 3584, duration: 3.231s, episode steps: 106, steps per second: 33, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.208 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 296347680.000000, mean_absolute_error: 52928.652344, mean_q: 61546.839844
  243497/5000000: episode: 3585, duration: 1.607s, episode steps: 50, steps per second: 31, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 138824032.000000, mean_absolute_error: 54007.144531, mean_q: 60045.511719
  243522/5000000: episode: 3586, duration: 0.768s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 261455776.000000, mean_absolute_error: 56377.050781, mean_q: 62810.609375
  243548/5000000: episode: 3587, duration: 0.753s, episode steps: 26, st

  245177/5000000: episode: 3611, duration: 0.984s, episode steps: 32, steps per second: 33, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 139782688.000000, mean_absolute_error: 58319.878906, mean_q: 66808.882812
  245202/5000000: episode: 3612, duration: 0.800s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 141413936.000000, mean_absolute_error: 45621.828125, mean_q: 53335.875000
  245239/5000000: episode: 3613, duration: 1.241s, episode steps: 37, steps per second: 30, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.649 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 113347560.000000, mean_absolute_error: 51515.218750, mean_q: 58407.824219
  245266/5000000: episode: 3614, duration: 0.771s, episode steps: 27, ste

  246855/5000000: episode: 3638, duration: 0.986s, episode steps: 31, steps per second: 31, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 372862336.000000, mean_absolute_error: 74695.960938, mean_q: 80422.429688
  246932/5000000: episode: 3639, duration: 2.576s, episode steps: 77, steps per second: 30, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.130 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 81344368.000000, mean_absolute_error: 45202.027344, mean_q: 52177.675781
  247029/5000000: episode: 3640, duration: 2.915s, episode steps: 97, steps per second: 33, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 872274880.000000, mean_absolute_error: 72036.718750, mean_q: 78947.921875
  247131/5000000: episode: 3641, duration: 3.233s, episode steps: 102, ste

  248701/5000000: episode: 3665, duration: 1.683s, episode steps: 60, steps per second: 36, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.317 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 239992896.000000, mean_absolute_error: 73120.406250, mean_q: 79977.726562
  248761/5000000: episode: 3666, duration: 1.874s, episode steps: 60, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 152756416.000000, mean_absolute_error: 62096.445312, mean_q: 71109.164062
  248786/5000000: episode: 3667, duration: 0.804s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 168152208.000000, mean_absolute_error: 44205.406250, mean_q: 51731.449219
  248827/5000000: episode: 3668, duration: 1.136s, episode steps: 41, ste

  250183/5000000: episode: 3692, duration: 1.967s, episode steps: 66, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.106 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 257980608.000000, mean_absolute_error: 63410.562500, mean_q: 69408.335938
  250214/5000000: episode: 3693, duration: 0.921s, episode steps: 31, steps per second: 34, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.097 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 416659360.000000, mean_absolute_error: 70890.820312, mean_q: 77590.320312
  250240/5000000: episode: 3694, duration: 0.751s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 1637353984.000000, mean_absolute_error: 80203.507812, mean_q: 85127.531250
  250270/5000000: episode: 3695, duration: 0.860s, episode steps: 30, st

  251535/5000000: episode: 3719, duration: 0.751s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 3534407168.000000, mean_absolute_error: 69074.453125, mean_q: 79462.398438
  251563/5000000: episode: 3720, duration: 0.835s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 225449232.000000, mean_absolute_error: 43900.570312, mean_q: 51857.152344
  251619/5000000: episode: 3721, duration: 1.661s, episode steps: 56, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.196 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 774934336.000000, mean_absolute_error: 58918.824219, mean_q: 66018.078125
  251646/5000000: episode: 3722, duration: 0.816s, episode steps: 27, st

  253408/5000000: episode: 3746, duration: 11.034s, episode steps: 350, steps per second: 32, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.397 [0.000, 5.000], mean observation: 0.283 [0.000, 24.000], loss: 422285248.000000, mean_absolute_error: 54330.707031, mean_q: 62643.941406
  253434/5000000: episode: 3747, duration: 0.886s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.077 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 527302272.000000, mean_absolute_error: 59932.832031, mean_q: 67748.593750
  253464/5000000: episode: 3748, duration: 0.941s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.733 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 63174312.000000, mean_absolute_error: 58611.675781, mean_q: 64555.976562
  253626/5000000: episode: 3749, duration: 5.059s, episode steps: 162, s

  254951/5000000: episode: 3773, duration: 2.518s, episode steps: 84, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 918321664.000000, mean_absolute_error: 62340.828125, mean_q: 67325.601562
  254977/5000000: episode: 3774, duration: 0.788s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 294555328.000000, mean_absolute_error: 52904.117188, mean_q: 61870.902344
  255004/5000000: episode: 3775, duration: 19.464s, episode steps: 27, steps per second: 1, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 645883520.000000, mean_absolute_error: 91487.242188, mean_q: 93222.507812
  255044/5000000: episode: 3776, duration: 1.210s, episode steps: 40, ste

  256425/5000000: episode: 3800, duration: 0.891s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 311229120.000000, mean_absolute_error: 64425.117188, mean_q: 69934.640625
  256450/5000000: episode: 3801, duration: 0.803s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 321830944.000000, mean_absolute_error: 56732.355469, mean_q: 61562.289062
  256481/5000000: episode: 3802, duration: 1.006s, episode steps: 31, steps per second: 31, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 335181536.000000, mean_absolute_error: 47434.007812, mean_q: 57517.957031
  256506/5000000: episode: 3803, duration: 0.839s, episode steps: 25, ste

  257878/5000000: episode: 3827, duration: 3.471s, episode steps: 112, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.438 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 337530688.000000, mean_absolute_error: 50305.031250, mean_q: 56998.851562
  257966/5000000: episode: 3828, duration: 2.756s, episode steps: 88, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.216 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 653451008.000000, mean_absolute_error: 41298.601562, mean_q: 47387.292969
  258052/5000000: episode: 3829, duration: 2.883s, episode steps: 86, steps per second: 30, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.674 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 657509760.000000, mean_absolute_error: 42842.246094, mean_q: 50245.527344
  258077/5000000: episode: 3830, duration: 0.789s, episode steps: 25, st

  259265/5000000: episode: 3854, duration: 0.858s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 188978080.000000, mean_absolute_error: 62113.085938, mean_q: 68545.492188
  259293/5000000: episode: 3855, duration: 0.781s, episode steps: 28, steps per second: 36, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 317659232.000000, mean_absolute_error: 58828.269531, mean_q: 68336.242188
  259327/5000000: episode: 3856, duration: 1.063s, episode steps: 34, steps per second: 32, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.441 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 69901184.000000, mean_absolute_error: 48928.730469, mean_q: 58298.195312
  259357/5000000: episode: 3857, duration: 0.831s, episode steps: 30, step

  260826/5000000: episode: 3881, duration: 4.557s, episode steps: 153, steps per second: 34, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 454182784.000000, mean_absolute_error: 59556.050781, mean_q: 65888.742188
  260894/5000000: episode: 3882, duration: 1.923s, episode steps: 68, steps per second: 35, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.088 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 435641504.000000, mean_absolute_error: 55380.152344, mean_q: 62661.226562
  260956/5000000: episode: 3883, duration: 1.890s, episode steps: 62, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 193151680.000000, mean_absolute_error: 44181.097656, mean_q: 52114.164062
  261020/5000000: episode: 3884, duration: 1.968s, episode steps: 64, st

  262717/5000000: episode: 3908, duration: 3.292s, episode steps: 113, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.469 [0.000, 5.000], mean observation: 0.284 [0.000, 24.000], loss: 1130896512.000000, mean_absolute_error: 54768.046875, mean_q: 63745.863281
  262772/5000000: episode: 3909, duration: 1.631s, episode steps: 55, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 143031888.000000, mean_absolute_error: 45939.738281, mean_q: 55083.531250
  262835/5000000: episode: 3910, duration: 1.932s, episode steps: 63, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 502156064.000000, mean_absolute_error: 56375.921875, mean_q: 65680.914062
  262862/5000000: episode: 3911, duration: 0.855s, episode steps: 27, s

  264353/5000000: episode: 3935, duration: 0.944s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.276 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 206745312.000000, mean_absolute_error: 69653.062500, mean_q: 75260.398438
  264378/5000000: episode: 3936, duration: 0.787s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 126025504.000000, mean_absolute_error: 53857.054688, mean_q: 65576.968750
  264489/5000000: episode: 3937, duration: 3.458s, episode steps: 111, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.256 [0.000, 24.000], loss: 555410624.000000, mean_absolute_error: 69221.898438, mean_q: 76264.601562
  264516/5000000: episode: 3938, duration: 0.732s, episode steps: 27, st

  266292/5000000: episode: 3962, duration: 0.984s, episode steps: 30, steps per second: 30, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 368898880.000000, mean_absolute_error: 70631.703125, mean_q: 76345.132812
  266360/5000000: episode: 3963, duration: 2.209s, episode steps: 68, steps per second: 31, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.632 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 551807808.000000, mean_absolute_error: 72779.101562, mean_q: 80921.601562
  266389/5000000: episode: 3964, duration: 0.943s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 1.931 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 362658176.000000, mean_absolute_error: 68896.015625, mean_q: 77440.492188
  266419/5000000: episode: 3965, duration: 0.999s, episode steps: 30, ste

  267588/5000000: episode: 3989, duration: 1.653s, episode steps: 56, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.071 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 547686848.000000, mean_absolute_error: 61138.750000, mean_q: 66576.070312
  267627/5000000: episode: 3990, duration: 1.268s, episode steps: 39, steps per second: 31, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 76092152.000000, mean_absolute_error: 45400.128906, mean_q: 51828.621094
  267656/5000000: episode: 3991, duration: 0.845s, episode steps: 29, steps per second: 34, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 194109680.000000, mean_absolute_error: 54131.003906, mean_q: 59993.050781
  267690/5000000: episode: 3992, duration: 1.026s, episode steps: 34, step

  269286/5000000: episode: 4016, duration: 0.917s, episode steps: 33, steps per second: 36, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 1183876096.000000, mean_absolute_error: 57587.527344, mean_q: 64542.410156
  269349/5000000: episode: 4017, duration: 2.084s, episode steps: 63, steps per second: 30, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 3.063 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 2892058112.000000, mean_absolute_error: 85333.203125, mean_q: 92322.992188
  269494/5000000: episode: 4018, duration: 4.283s, episode steps: 145, steps per second: 34, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.703 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 127110432.000000, mean_absolute_error: 45865.800781, mean_q: 50981.488281
  269519/5000000: episode: 4019, duration: 0.716s, episode steps: 25, 

  270957/5000000: episode: 4043, duration: 1.623s, episode steps: 54, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 4197547008.000000, mean_absolute_error: 71022.070312, mean_q: 75440.460938
  271050/5000000: episode: 4044, duration: 2.646s, episode steps: 93, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.289 [0.000, 24.000], loss: 2246107904.000000, mean_absolute_error: 69221.492188, mean_q: 77020.835938
  271118/5000000: episode: 4045, duration: 1.950s, episode steps: 68, steps per second: 35, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.882 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 922041728.000000, mean_absolute_error: 73914.312500, mean_q: 79530.070312
  271171/5000000: episode: 4046, duration: 1.571s, episode steps: 53, s

  272547/5000000: episode: 4070, duration: 3.054s, episode steps: 95, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 2509240576.000000, mean_absolute_error: 57327.000000, mean_q: 64639.679688
  272601/5000000: episode: 4071, duration: 1.604s, episode steps: 54, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.833 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 1317166592.000000, mean_absolute_error: 78241.835938, mean_q: 81643.390625
  272653/5000000: episode: 4072, duration: 1.542s, episode steps: 52, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.365 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 348510496.000000, mean_absolute_error: 44293.937500, mean_q: 50406.902344
  272679/5000000: episode: 4073, duration: 0.789s, episode steps: 26, s

  274673/5000000: episode: 4097, duration: 0.922s, episode steps: 31, steps per second: 34, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.323 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 608302016.000000, mean_absolute_error: 36317.207031, mean_q: 42581.121094
  274791/5000000: episode: 4098, duration: 3.603s, episode steps: 118, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.119 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 641331776.000000, mean_absolute_error: 76637.210938, mean_q: 77855.273438
  274818/5000000: episode: 4099, duration: 0.747s, episode steps: 27, steps per second: 36, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.148 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 64306044.000000, mean_absolute_error: 42080.066406, mean_q: 48634.757812
  274923/5000000: episode: 4100, duration: 3.024s, episode steps: 105, st

  276189/5000000: episode: 4124, duration: 0.762s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 1065215296.000000, mean_absolute_error: 76564.343750, mean_q: 78105.257812
  276218/5000000: episode: 4125, duration: 0.795s, episode steps: 29, steps per second: 36, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.931 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 251665920.000000, mean_absolute_error: 62125.988281, mean_q: 69786.226562
  276277/5000000: episode: 4126, duration: 1.685s, episode steps: 59, steps per second: 35, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.254 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 995786304.000000, mean_absolute_error: 115573.203125, mean_q: 113052.398438
  276304/5000000: episode: 4127, duration: 0.906s, episode steps: 27, 

  277866/5000000: episode: 4151, duration: 1.005s, episode steps: 30, steps per second: 30, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.967 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 367412928.000000, mean_absolute_error: 130310.085938, mean_q: 123582.109375
  277893/5000000: episode: 4152, duration: 0.939s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 43702692.000000, mean_absolute_error: 49958.906250, mean_q: 58218.441406
  277922/5000000: episode: 4153, duration: 1.010s, episode steps: 29, steps per second: 29, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.069 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 905117888.000000, mean_absolute_error: 93515.445312, mean_q: 94654.148438
  277948/5000000: episode: 4154, duration: 0.852s, episode steps: 26, st

  279046/5000000: episode: 4178, duration: 1.994s, episode steps: 63, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.190 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 975617088.000000, mean_absolute_error: 101856.367188, mean_q: 105321.796875
  279093/5000000: episode: 4179, duration: 1.394s, episode steps: 47, steps per second: 34, episode reward: -1.000, mean reward: -0.021 [-1.000, 0.000], mean action: 2.234 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 359240480.000000, mean_absolute_error: 59119.164062, mean_q: 68874.421875
  279124/5000000: episode: 4180, duration: 0.967s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 10844478464.000000, mean_absolute_error: 111653.664062, mean_q: 113562.937500
  279228/5000000: episode: 4181, duration: 3.300s, episode steps: 1

  280219/5000000: episode: 4205, duration: 0.734s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 392076320.000000, mean_absolute_error: 122263.890625, mean_q: 119270.343750
  280244/5000000: episode: 4206, duration: 0.786s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 2684416512.000000, mean_absolute_error: 92681.812500, mean_q: 94056.062500
  280298/5000000: episode: 4207, duration: 1.674s, episode steps: 54, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.759 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 1396060160.000000, mean_absolute_error: 110098.304688, mean_q: 111576.632812
  280329/5000000: episode: 4208, duration: 0.950s, episode steps: 3

  282037/5000000: episode: 4232, duration: 1.987s, episode steps: 81, steps per second: 41, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.358 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 454667488.000000, mean_absolute_error: 78364.414062, mean_q: 82626.070312
  282098/5000000: episode: 4233, duration: 1.748s, episode steps: 61, steps per second: 35, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.459 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 652672960.000000, mean_absolute_error: 80131.757812, mean_q: 82386.570312
  282171/5000000: episode: 4234, duration: 2.165s, episode steps: 73, steps per second: 34, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.247 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 223485760.000000, mean_absolute_error: 61750.226562, mean_q: 69553.250000
  282197/5000000: episode: 4235, duration: 0.840s, episode steps: 26, ste

  283385/5000000: episode: 4259, duration: 0.773s, episode steps: 29, steps per second: 38, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.414 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 4262977536.000000, mean_absolute_error: 135835.687500, mean_q: 135613.968750
  283414/5000000: episode: 4260, duration: 0.908s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 1061731904.000000, mean_absolute_error: 94928.234375, mean_q: 104946.554688
  283445/5000000: episode: 4261, duration: 0.931s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.548 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 2551432192.000000, mean_absolute_error: 87573.546875, mean_q: 92264.476562
  283623/5000000: episode: 4262, duration: 5.590s, episode steps: 1

  284906/5000000: episode: 4286, duration: 0.955s, episode steps: 29, steps per second: 30, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.448 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 326444704.000000, mean_absolute_error: 83763.187500, mean_q: 89344.718750
  284933/5000000: episode: 4287, duration: 0.782s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.259 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 618819840.000000, mean_absolute_error: 80583.242188, mean_q: 88754.187500
  284961/5000000: episode: 4288, duration: 0.878s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 139523808.000000, mean_absolute_error: 82897.187500, mean_q: 90698.515625
  284987/5000000: episode: 4289, duration: 0.754s, episode steps: 26, ste

  286322/5000000: episode: 4313, duration: 2.540s, episode steps: 88, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.420 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 518363360.000000, mean_absolute_error: 74710.242188, mean_q: 78558.093750
  286438/5000000: episode: 4314, duration: 3.460s, episode steps: 116, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.379 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 808468736.000000, mean_absolute_error: 78777.570312, mean_q: 84458.726562
  286463/5000000: episode: 4315, duration: 0.775s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 1627678976.000000, mean_absolute_error: 99210.179688, mean_q: 105868.140625
  286729/5000000: episode: 4316, duration: 8.170s, episode steps: 266,

  288224/5000000: episode: 4340, duration: 4.844s, episode steps: 156, steps per second: 32, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.449 [0.000, 5.000], mean observation: 0.306 [0.000, 24.000], loss: 466948160.000000, mean_absolute_error: 90541.718750, mean_q: 96109.789062
  288283/5000000: episode: 4341, duration: 1.799s, episode steps: 59, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.492 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 502290496.000000, mean_absolute_error: 81638.968750, mean_q: 90222.289062
  288311/5000000: episode: 4342, duration: 0.828s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.679 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 1355421440.000000, mean_absolute_error: 113772.078125, mean_q: 110587.492188
  288337/5000000: episode: 4343, duration: 0.951s, episode steps: 26,

  289696/5000000: episode: 4367, duration: 0.676s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 3445660672.000000, mean_absolute_error: 169032.640625, mean_q: 159380.875000
  289857/5000000: episode: 4368, duration: 4.936s, episode steps: 161, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.391 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 2012435072.000000, mean_absolute_error: 89649.523438, mean_q: 95145.562500
  289930/5000000: episode: 4369, duration: 2.065s, episode steps: 73, steps per second: 35, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.507 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 611667712.000000, mean_absolute_error: 73990.437500, mean_q: 80795.867188
  289960/5000000: episode: 4370, duration: 0.891s, episode steps: 30

  291389/5000000: episode: 4394, duration: 2.572s, episode steps: 88, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.830 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 1567432832.000000, mean_absolute_error: 116856.976562, mean_q: 118681.382812
  291452/5000000: episode: 4395, duration: 1.924s, episode steps: 63, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.635 [0.000, 5.000], mean observation: 0.276 [0.000, 24.000], loss: 399425216.000000, mean_absolute_error: 110527.695312, mean_q: 110998.937500
  291522/5000000: episode: 4396, duration: 2.212s, episode steps: 70, steps per second: 32, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.186 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 143932608.000000, mean_absolute_error: 84535.421875, mean_q: 88242.304688
  291548/5000000: episode: 4397, duration: 0.775s, episode steps: 26

  292775/5000000: episode: 4421, duration: 2.337s, episode steps: 72, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 1.958 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 1206190720.000000, mean_absolute_error: 86684.132812, mean_q: 89889.695312
  292832/5000000: episode: 4422, duration: 1.672s, episode steps: 57, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.860 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 4026083840.000000, mean_absolute_error: 104473.500000, mean_q: 104149.476562
  292895/5000000: episode: 4423, duration: 1.985s, episode steps: 63, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.683 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 714048512.000000, mean_absolute_error: 79425.156250, mean_q: 88575.789062
  293019/5000000: episode: 4424, duration: 3.993s, episode steps: 124

  294491/5000000: episode: 4448, duration: 3.430s, episode steps: 103, steps per second: 30, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.281 [0.000, 24.000], loss: 2501629184.000000, mean_absolute_error: 78615.515625, mean_q: 79914.578125
  294518/5000000: episode: 4449, duration: 0.856s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.111 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 731731840.000000, mean_absolute_error: 59685.195312, mean_q: 69369.187500
  294580/5000000: episode: 4450, duration: 2.046s, episode steps: 62, steps per second: 30, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.403 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 551187776.000000, mean_absolute_error: 65776.804688, mean_q: 77802.968750
  294627/5000000: episode: 4451, duration: 1.525s, episode steps: 47, s

  296109/5000000: episode: 4475, duration: 2.068s, episode steps: 69, steps per second: 33, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.391 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 425575808.000000, mean_absolute_error: 79516.804688, mean_q: 86152.046875
  296136/5000000: episode: 4476, duration: 0.846s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.148 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 681375232.000000, mean_absolute_error: 72952.320312, mean_q: 79469.679688
  296167/5000000: episode: 4477, duration: 1.052s, episode steps: 31, steps per second: 29, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 1930514304.000000, mean_absolute_error: 97084.906250, mean_q: 101591.953125
  296196/5000000: episode: 4478, duration: 0.932s, episode steps: 29, s

  297808/5000000: episode: 4502, duration: 0.889s, episode steps: 29, steps per second: 33, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 2212249856.000000, mean_absolute_error: 157691.812500, mean_q: 154836.609375
  297836/5000000: episode: 4503, duration: 1.049s, episode steps: 28, steps per second: 27, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 6724289024.000000, mean_absolute_error: 77477.367188, mean_q: 80895.546875
  297865/5000000: episode: 4504, duration: 0.836s, episode steps: 29, steps per second: 35, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 1600562176.000000, mean_absolute_error: 69451.765625, mean_q: 74186.296875
  298011/5000000: episode: 4505, duration: 4.478s, episode steps: 14

  299808/5000000: episode: 4529, duration: 5.361s, episode steps: 176, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.386 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 2144240768.000000, mean_absolute_error: 88464.546875, mean_q: 89557.546875
  299833/5000000: episode: 4530, duration: 0.684s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 991226176.000000, mean_absolute_error: 60091.128906, mean_q: 66444.953125
  299886/5000000: episode: 4531, duration: 1.420s, episode steps: 53, steps per second: 37, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.528 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 722249472.000000, mean_absolute_error: 76001.218750, mean_q: 80293.179688
  299913/5000000: episode: 4532, duration: 0.860s, episode steps: 27, s

  301357/5000000: episode: 4556, duration: 1.163s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 4301719552.000000, mean_absolute_error: 83285.414062, mean_q: 86833.523438
  301425/5000000: episode: 4557, duration: 2.057s, episode steps: 68, steps per second: 33, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.382 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 2257753088.000000, mean_absolute_error: 89974.539062, mean_q: 93457.992188
  301512/5000000: episode: 4558, duration: 2.635s, episode steps: 87, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 581997888.000000, mean_absolute_error: 65095.101562, mean_q: 70381.921875
  301537/5000000: episode: 4559, duration: 0.683s, episode steps: 25, s

  302824/5000000: episode: 4583, duration: 1.207s, episode steps: 41, steps per second: 34, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.073 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 4865448448.000000, mean_absolute_error: 97488.250000, mean_q: 106151.539062
  302887/5000000: episode: 4584, duration: 1.988s, episode steps: 63, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.349 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 2180712448.000000, mean_absolute_error: 68465.695312, mean_q: 77939.281250
  302914/5000000: episode: 4585, duration: 0.777s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 206075840.000000, mean_absolute_error: 93281.835938, mean_q: 100460.945312
  303128/5000000: episode: 4586, duration: 24.413s, episode steps: 21

  304796/5000000: episode: 4610, duration: 0.848s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 19388921856.000000, mean_absolute_error: 57325.558594, mean_q: 61192.218750
  304840/5000000: episode: 4611, duration: 1.485s, episode steps: 44, steps per second: 30, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.136 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 1208195968.000000, mean_absolute_error: 59051.964844, mean_q: 66479.656250
  304917/5000000: episode: 4612, duration: 2.416s, episode steps: 77, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 859432320.000000, mean_absolute_error: 100808.304688, mean_q: 100887.703125
  305006/5000000: episode: 4613, duration: 2.629s, episode steps: 89

  306396/5000000: episode: 4637, duration: 0.857s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 3064836352.000000, mean_absolute_error: 122896.367188, mean_q: 123470.507812
  306421/5000000: episode: 4638, duration: 0.804s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 497166208.000000, mean_absolute_error: 62899.109375, mean_q: 70992.765625
  306498/5000000: episode: 4639, duration: 2.152s, episode steps: 77, steps per second: 36, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 764195008.000000, mean_absolute_error: 96808.468750, mean_q: 100323.906250
  306548/5000000: episode: 4640, duration: 1.545s, episode steps: 50,

  308150/5000000: episode: 4664, duration: 0.868s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.593 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 33132236.000000, mean_absolute_error: 61244.878906, mean_q: 70796.414062
  308292/5000000: episode: 4665, duration: 3.930s, episode steps: 142, steps per second: 36, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.592 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 919001344.000000, mean_absolute_error: 88751.320312, mean_q: 92701.875000
  308320/5000000: episode: 4666, duration: 0.915s, episode steps: 28, steps per second: 31, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 1219130880.000000, mean_absolute_error: 105466.546875, mean_q: 110194.859375
  308422/5000000: episode: 4667, duration: 2.857s, episode steps: 102,

  309923/5000000: episode: 4691, duration: 1.453s, episode steps: 52, steps per second: 36, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 2345073664.000000, mean_absolute_error: 102043.507812, mean_q: 116481.992188
  309975/5000000: episode: 4692, duration: 1.510s, episode steps: 52, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 576123520.000000, mean_absolute_error: 81617.593750, mean_q: 90704.234375
  310003/5000000: episode: 4693, duration: 0.921s, episode steps: 28, steps per second: 30, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.143 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 685169216.000000, mean_absolute_error: 124147.460938, mean_q: 122111.773438
  310054/5000000: episode: 4694, duration: 1.327s, episode steps: 51

  311227/5000000: episode: 4718, duration: 2.463s, episode steps: 76, steps per second: 31, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.566 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 1937531776.000000, mean_absolute_error: 87654.765625, mean_q: 91005.382812
  311259/5000000: episode: 4719, duration: 0.926s, episode steps: 32, steps per second: 35, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.344 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 577410496.000000, mean_absolute_error: 63623.234375, mean_q: 73726.140625
  311286/5000000: episode: 4720, duration: 0.799s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 51133948.000000, mean_absolute_error: 90371.109375, mean_q: 92712.750000
  311318/5000000: episode: 4721, duration: 1.038s, episode steps: 32, ste

  312639/5000000: episode: 4745, duration: 1.720s, episode steps: 58, steps per second: 34, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 203491600.000000, mean_absolute_error: 71312.335938, mean_q: 79583.859375
  312718/5000000: episode: 4746, duration: 2.213s, episode steps: 79, steps per second: 36, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.696 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 11791220736.000000, mean_absolute_error: 65356.593750, mean_q: 72774.328125
  312790/5000000: episode: 4747, duration: 2.155s, episode steps: 72, steps per second: 33, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.194 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 1883689216.000000, mean_absolute_error: 65196.070312, mean_q: 74784.320312
  312817/5000000: episode: 4748, duration: 0.823s, episode steps: 27, 

  314298/5000000: episode: 4772, duration: 0.842s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 689935872.000000, mean_absolute_error: 67074.656250, mean_q: 71954.726562
  314383/5000000: episode: 4773, duration: 2.725s, episode steps: 85, steps per second: 31, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.224 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 1599171072.000000, mean_absolute_error: 59810.429688, mean_q: 66059.539062
  314506/5000000: episode: 4774, duration: 4.083s, episode steps: 123, steps per second: 30, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.260 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 3607651840.000000, mean_absolute_error: 86386.437500, mean_q: 89243.890625
  314533/5000000: episode: 4775, duration: 0.827s, episode steps: 27, 

  315689/5000000: episode: 4799, duration: 5.650s, episode steps: 185, steps per second: 33, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.541 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 10572324864.000000, mean_absolute_error: 79363.164062, mean_q: 83099.476562
  315850/5000000: episode: 4800, duration: 4.824s, episode steps: 161, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.453 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 1906737408.000000, mean_absolute_error: 97920.000000, mean_q: 97925.539062
  315878/5000000: episode: 4801, duration: 0.832s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 4970109440.000000, mean_absolute_error: 102612.328125, mean_q: 102365.914062
  315938/5000000: episode: 4802, duration: 1.755s, episode steps:

  317494/5000000: episode: 4826, duration: 1.707s, episode steps: 53, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.057 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 292455776.000000, mean_absolute_error: 56904.949219, mean_q: 65168.996094
  317553/5000000: episode: 4827, duration: 1.704s, episode steps: 59, steps per second: 35, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.288 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 326357088.000000, mean_absolute_error: 71599.984375, mean_q: 75513.937500
  317714/5000000: episode: 4828, duration: 4.963s, episode steps: 161, steps per second: 32, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.578 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 1879547264.000000, mean_absolute_error: 81716.695312, mean_q: 83271.109375
  317766/5000000: episode: 4829, duration: 1.656s, episode steps: 52, s

  319356/5000000: episode: 4853, duration: 2.381s, episode steps: 88, steps per second: 37, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.375 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 1489463680.000000, mean_absolute_error: 67122.320312, mean_q: 73860.234375
  319410/5000000: episode: 4854, duration: 1.641s, episode steps: 54, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 1569042560.000000, mean_absolute_error: 79503.804688, mean_q: 82186.000000
  319492/5000000: episode: 4855, duration: 2.282s, episode steps: 82, steps per second: 36, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.634 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 250708160.000000, mean_absolute_error: 51532.769531, mean_q: 57838.140625
  319602/5000000: episode: 4856, duration: 3.542s, episode steps: 110, 

  321132/5000000: episode: 4880, duration: 2.331s, episode steps: 86, steps per second: 37, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.709 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 31557384.000000, mean_absolute_error: 51155.824219, mean_q: 55303.250000
  321165/5000000: episode: 4881, duration: 1.059s, episode steps: 33, steps per second: 31, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.242 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 25602266.000000, mean_absolute_error: 59302.777344, mean_q: 62257.832031
  321191/5000000: episode: 4882, duration: 0.716s, episode steps: 26, steps per second: 36, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 2568932096.000000, mean_absolute_error: 117068.828125, mean_q: 111186.125000
  321280/5000000: episode: 4883, duration: 2.683s, episode steps: 89, st

  322872/5000000: episode: 4907, duration: 1.147s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.694 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 4656576512.000000, mean_absolute_error: 114297.593750, mean_q: 116577.695312
  322902/5000000: episode: 4908, duration: 0.904s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 2094047872.000000, mean_absolute_error: 68288.898438, mean_q: 74013.164062
  322927/5000000: episode: 4909, duration: 0.679s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 180458416.000000, mean_absolute_error: 34068.460938, mean_q: 41090.605469
  322954/5000000: episode: 4910, duration: 0.721s, episode steps: 27,

  324574/5000000: episode: 4934, duration: 0.837s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 538483328.000000, mean_absolute_error: 77783.203125, mean_q: 78161.554688
  324733/5000000: episode: 4935, duration: 4.283s, episode steps: 159, steps per second: 37, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 8164219904.000000, mean_absolute_error: 52553.160156, mean_q: 58616.113281
  324824/5000000: episode: 4936, duration: 2.619s, episode steps: 91, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.527 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 2338885376.000000, mean_absolute_error: 52032.390625, mean_q: 57152.570312
  324849/5000000: episode: 4937, duration: 0.748s, episode steps: 25, 

  326411/5000000: episode: 4961, duration: 0.806s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 209510512.000000, mean_absolute_error: 68442.929688, mean_q: 69285.140625
  326443/5000000: episode: 4962, duration: 0.889s, episode steps: 32, steps per second: 36, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.531 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 127500672.000000, mean_absolute_error: 45531.054688, mean_q: 51931.460938
  326468/5000000: episode: 4963, duration: 0.742s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.160 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 659530816.000000, mean_absolute_error: 44136.691406, mean_q: 46852.554688
  326495/5000000: episode: 4964, duration: 0.844s, episode steps: 27, ste

  329612/5000000: episode: 4988, duration: 0.888s, episode steps: 25, steps per second: 28, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.240 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 262549712.000000, mean_absolute_error: 54257.621094, mean_q: 49860.839844
  329785/5000000: episode: 4989, duration: 5.510s, episode steps: 173, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.029 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 4187525120.000000, mean_absolute_error: 66852.718750, mean_q: 61995.292969
  329945/5000000: episode: 4990, duration: 4.792s, episode steps: 160, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.306 [0.000, 5.000], mean observation: 0.307 [0.000, 24.000], loss: 487967424.000000, mean_absolute_error: 53365.835938, mean_q: 52651.949219
  330028/5000000: episode: 4991, duration: 2.514s, episode steps: 83, 

  338403/5000000: episode: 5015, duration: 1.028s, episode steps: 37, steps per second: 36, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.243 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 81485528.000000, mean_absolute_error: 16038.893555, mean_q: 15226.134766
  338460/5000000: episode: 5016, duration: 1.825s, episode steps: 57, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 1.842 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 869356352.000000, mean_absolute_error: 21148.376953, mean_q: 15676.714844
  338508/5000000: episode: 5017, duration: 1.355s, episode steps: 48, steps per second: 35, episode reward: -1.000, mean reward: -0.021 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 117567168.000000, mean_absolute_error: 27892.810547, mean_q: 23036.822266
  338595/5000000: episode: 5018, duration: 2.801s, episode steps: 87, step

  346283/5000000: episode: 5042, duration: 1.195s, episode steps: 39, steps per second: 33, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 1.769 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 9090275.000000, mean_absolute_error: 8587.605469, mean_q: 5982.630859
  346533/5000000: episode: 5043, duration: 6.891s, episode steps: 250, steps per second: 36, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 1.844 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 4479479296.000000, mean_absolute_error: 42167.425781, mean_q: 32268.466797
  346605/5000000: episode: 5044, duration: 1.952s, episode steps: 72, steps per second: 37, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 1.653 [0.000, 5.000], mean observation: 0.267 [0.000, 24.000], loss: 544900672.000000, mean_absolute_error: 40293.765625, mean_q: 31009.298828
  346771/5000000: episode: 5045, duration: 5.321s, episode steps: 166, step

  355389/5000000: episode: 5069, duration: 14.161s, episode steps: 564, steps per second: 40, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.906 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 262688768.000000, mean_absolute_error: 25881.912109, mean_q: 22840.218750
  355789/5000000: episode: 5070, duration: 10.721s, episode steps: 400, steps per second: 37, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.808 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 1223334144.000000, mean_absolute_error: 20388.988281, mean_q: 16716.761719
  356039/5000000: episode: 5071, duration: 7.811s, episode steps: 250, steps per second: 32, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 1.972 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 813244352.000000, mean_absolute_error: 37266.000000, mean_q: 30493.386719
  356849/5000000: episode: 5072, duration: 21.682s, episode steps: 

  365724/5000000: episode: 5096, duration: 8.475s, episode steps: 328, steps per second: 39, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.848 [0.000, 5.000], mean observation: 0.321 [0.000, 24.000], loss: 153284304.000000, mean_absolute_error: 12666.954102, mean_q: 9588.272461
  366016/5000000: episode: 5097, duration: 9.302s, episode steps: 292, steps per second: 31, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.836 [0.000, 4.000], mean observation: 0.282 [0.000, 24.000], loss: 403415488.000000, mean_absolute_error: 23369.005859, mean_q: 19393.050781
  366543/5000000: episode: 5098, duration: 15.009s, episode steps: 527, steps per second: 35, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.812 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 138112464.000000, mean_absolute_error: 9001.461914, mean_q: 7465.043457
  366802/5000000: episode: 5099, duration: 7.671s, episode steps: 259, s

  375896/5000000: episode: 5123, duration: 28.650s, episode steps: 1047, steps per second: 37, episode reward: -1.000, mean reward: -0.001 [-1.000, 0.000], mean action: 1.844 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 480331488.000000, mean_absolute_error: 19512.810547, mean_q: 16753.271484
  376429/5000000: episode: 5124, duration: 15.695s, episode steps: 533, steps per second: 34, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.760 [0.000, 5.000], mean observation: 0.298 [0.000, 24.000], loss: 300838208.000000, mean_absolute_error: 20617.640625, mean_q: 17503.705078
  377363/5000000: episode: 5125, duration: 22.609s, episode steps: 934, steps per second: 41, episode reward: -1.000, mean reward: -0.001 [-1.000, 0.000], mean action: 1.766 [0.000, 5.000], mean observation: 0.395 [0.000, 24.000], loss: 1489974400.000000, mean_absolute_error: 18296.832031, mean_q: 15444.910156
  377899/5000000: episode: 5126, duration: 15.524s, episode steps

  391515/5000000: episode: 5150, duration: 1.196s, episode steps: 36, steps per second: 30, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 1.917 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 160373344.000000, mean_absolute_error: 8462.655273, mean_q: 10098.572266
  391663/5000000: episode: 5151, duration: 4.339s, episode steps: 148, steps per second: 34, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 1.905 [0.000, 5.000], mean observation: 0.303 [0.000, 24.000], loss: 7957652.000000, mean_absolute_error: 11879.458008, mean_q: 9232.763672
  392810/5000000: episode: 5152, duration: 31.052s, episode steps: 1147, steps per second: 37, episode reward: -1.000, mean reward: -0.001 [-1.000, 0.000], mean action: 1.861 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 100350800.000000, mean_absolute_error: 8306.215820, mean_q: 6909.193359
  393040/5000000: episode: 5153, duration: 7.447s, episode steps: 230, step

  406286/5000000: episode: 5184, duration: 22.920s, episode steps: 493, steps per second: 22, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.913 [0.000, 5.000], mean observation: 0.297 [0.000, 24.000], loss: 173375424.000000, mean_absolute_error: 5536.527832, mean_q: 4558.012695
  406453/5000000: episode: 5185, duration: 5.001s, episode steps: 167, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 1.737 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 57385372.000000, mean_absolute_error: 8566.300781, mean_q: 7253.104004
  406910/5000000: episode: 5186, duration: 14.756s, episode steps: 457, steps per second: 31, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.764 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 64148548.000000, mean_absolute_error: 6909.761719, mean_q: 6164.230957
  407388/5000000: episode: 5187, duration: 15.050s, episode steps: 478, step

  422049/5000000: episode: 5211, duration: 12.956s, episode steps: 419, steps per second: 32, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.807 [0.000, 4.000], mean observation: 0.267 [0.000, 24.000], loss: 33816132.000000, mean_absolute_error: 5755.558594, mean_q: 5371.490234
  422122/5000000: episode: 5212, duration: 2.088s, episode steps: 73, steps per second: 35, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 1.904 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 70553.070312, mean_absolute_error: 3324.101562, mean_q: 2381.166992
  422559/5000000: episode: 5213, duration: 14.582s, episode steps: 437, steps per second: 30, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.824 [0.000, 5.000], mean observation: 0.272 [0.000, 24.000], loss: 105206728.000000, mean_absolute_error: 6994.807129, mean_q: 6976.394531
  422806/5000000: episode: 5214, duration: 7.075s, episode steps: 247, steps per

  434167/5000000: episode: 5238, duration: 5.522s, episode steps: 170, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 1.771 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 40763532.000000, mean_absolute_error: 6621.683594, mean_q: 6521.332520
  434192/5000000: episode: 5239, duration: 0.762s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 1.920 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 6448730.000000, mean_absolute_error: 9581.352539, mean_q: 7867.651367
  434306/5000000: episode: 5240, duration: 3.629s, episode steps: 114, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 1.895 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 20474076.000000, mean_absolute_error: 6035.242676, mean_q: 6679.758789
  434679/5000000: episode: 5241, duration: 12.131s, episode steps: 373, steps per

  446197/5000000: episode: 5265, duration: 13.653s, episode steps: 452, steps per second: 33, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.982 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 32259680.000000, mean_absolute_error: 3511.058105, mean_q: 3513.274902
  446367/5000000: episode: 5266, duration: 5.004s, episode steps: 170, steps per second: 34, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 1.888 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 70192992.000000, mean_absolute_error: 4252.526367, mean_q: 3623.026367
  447548/5000000: episode: 5267, duration: 30.796s, episode steps: 1181, steps per second: 38, episode reward: -1.000, mean reward: -0.001 [-1.000, 0.000], mean action: 1.866 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 24971154.000000, mean_absolute_error: 3637.709473, mean_q: 4001.417480
  447877/5000000: episode: 5268, duration: 9.942s, episode steps: 329, steps

  462130/5000000: episode: 5292, duration: 2.164s, episode steps: 76, steps per second: 35, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 1.776 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 5654350.500000, mean_absolute_error: 5039.900391, mean_q: 5856.324219
  462434/5000000: episode: 5293, duration: 7.939s, episode steps: 304, steps per second: 38, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.868 [0.000, 4.000], mean observation: 0.282 [0.000, 24.000], loss: 101672152.000000, mean_absolute_error: 3668.536133, mean_q: 4123.403320
  463055/5000000: episode: 5294, duration: 16.635s, episode steps: 621, steps per second: 37, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.836 [0.000, 5.000], mean observation: 0.269 [0.000, 24.000], loss: 36756280.000000, mean_absolute_error: 4658.156250, mean_q: 5396.471191
  463379/5000000: episode: 5295, duration: 10.909s, episode steps: 324, steps p

  473698/5000000: episode: 5319, duration: 9.621s, episode steps: 305, steps per second: 32, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.961 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 36557228.000000, mean_absolute_error: 3047.237305, mean_q: 2823.265869
  474174/5000000: episode: 5320, duration: 14.030s, episode steps: 476, steps per second: 34, episode reward: 1.000, mean reward: 0.002 [0.000, 1.000], mean action: 1.666 [0.000, 4.000], mean observation: 0.275 [0.000, 24.000], loss: 41534156.000000, mean_absolute_error: 3513.584717, mean_q: 3536.622803
  474400/5000000: episode: 5321, duration: 6.239s, episode steps: 226, steps per second: 36, episode reward: 1.000, mean reward: 0.004 [0.000, 1.000], mean action: 1.920 [0.000, 4.000], mean observation: 0.284 [0.000, 24.000], loss: 13237910.000000, mean_absolute_error: 3024.930420, mean_q: 3390.862793
  474942/5000000: episode: 5322, duration: 16.977s, episode steps: 542, steps per se

  488434/5000000: episode: 5346, duration: 15.410s, episode steps: 542, steps per second: 35, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.989 [0.000, 5.000], mean observation: 0.296 [0.000, 24.000], loss: 132252128.000000, mean_absolute_error: 5740.594238, mean_q: 7494.272949
  488850/5000000: episode: 5347, duration: 14.065s, episode steps: 416, steps per second: 30, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.882 [0.000, 4.000], mean observation: 0.278 [0.000, 24.000], loss: 16886708.000000, mean_absolute_error: 4907.211426, mean_q: 6089.680176
  488986/5000000: episode: 5348, duration: 4.175s, episode steps: 136, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 1.897 [0.000, 5.000], mean observation: 0.277 [0.000, 24.000], loss: 316523.625000, mean_absolute_error: 2184.315430, mean_q: 1718.156738
  489518/5000000: episode: 5349, duration: 18.289s, episode steps: 532, steps 

  500885/5000000: episode: 5373, duration: 9.924s, episode steps: 386, steps per second: 39, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.751 [0.000, 4.000], mean observation: 0.275 [0.000, 24.000], loss: 7210964992.000000, mean_absolute_error: 10067.675781, mean_q: 11678.333008
  501228/5000000: episode: 5374, duration: 11.835s, episode steps: 343, steps per second: 29, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.880 [0.000, 5.000], mean observation: 0.291 [0.000, 24.000], loss: 1033240192.000000, mean_absolute_error: 16882.501953, mean_q: 21305.193359
  501889/5000000: episode: 5375, duration: 16.134s, episode steps: 661, steps per second: 41, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.811 [0.000, 4.000], mean observation: 0.284 [0.000, 24.000], loss: 759541056.000000, mean_absolute_error: 5662.311035, mean_q: 6269.613770
  501914/5000000: episode: 5376, duration: 0.803s, episode steps: 25

  512111/5000000: episode: 5400, duration: 12.120s, episode steps: 357, steps per second: 29, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.728 [0.000, 5.000], mean observation: 0.310 [0.000, 24.000], loss: 13792921.000000, mean_absolute_error: 3618.057129, mean_q: 3772.319580
  512477/5000000: episode: 5401, duration: 10.865s, episode steps: 366, steps per second: 34, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.850 [0.000, 4.000], mean observation: 0.276 [0.000, 24.000], loss: 32086032.000000, mean_absolute_error: 4679.445801, mean_q: 5223.778809
  512916/5000000: episode: 5402, duration: 13.814s, episode steps: 439, steps per second: 32, episode reward: -1.000, mean reward: -0.002 [-1.000, 0.000], mean action: 1.936 [0.000, 4.000], mean observation: 0.275 [0.000, 24.000], loss: 854532.812500, mean_absolute_error: 3394.391113, mean_q: 3616.830811
  513471/5000000: episode: 5403, duration: 16.098s, episode steps: 555, steps 

  524056/5000000: episode: 5427, duration: 9.996s, episode steps: 342, steps per second: 34, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.924 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 25672.888672, mean_absolute_error: 743.061340, mean_q: 543.739319
  524355/5000000: episode: 5428, duration: 9.616s, episode steps: 299, steps per second: 31, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.967 [0.000, 4.000], mean observation: 0.282 [0.000, 24.000], loss: 24935.830078, mean_absolute_error: 800.036621, mean_q: 579.603394
  524465/5000000: episode: 5429, duration: 3.213s, episode steps: 110, steps per second: 34, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 1.900 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 36479.652344, mean_absolute_error: 691.179626, mean_q: 505.507507
  524808/5000000: episode: 5430, duration: 11.608s, episode steps: 343, steps per second: 30, 

  533703/5000000: episode: 5455, duration: 4.074s, episode steps: 126, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 1.810 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 1.578497, mean_absolute_error: 7.018176, mean_q: 9.419184
  534032/5000000: episode: 5456, duration: 10.571s, episode steps: 329, steps per second: 31, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.988 [0.000, 4.000], mean observation: 0.278 [0.000, 24.000], loss: 1.372503, mean_absolute_error: 7.011236, mean_q: 9.418733
  534356/5000000: episode: 5457, duration: 10.724s, episode steps: 324, steps per second: 30, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 1.951 [0.000, 5.000], mean observation: 0.275 [0.000, 24.000], loss: 1.448208, mean_absolute_error: 7.022619, mean_q: 9.418312
  534445/5000000: episode: 5458, duration: 2.885s, episode steps: 89, steps per second: 31, episode reward: -1.000, 

  540025/5000000: episode: 5483, duration: 2.223s, episode steps: 71, steps per second: 32, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 1.887 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 0.638526, mean_absolute_error: 7.311077, mean_q: 9.305607
  540082/5000000: episode: 5484, duration: 1.663s, episode steps: 57, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 1.842 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.723241, mean_absolute_error: 7.312288, mean_q: 9.303137
  540125/5000000: episode: 5485, duration: 1.369s, episode steps: 43, steps per second: 31, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.163 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 0.678843, mean_absolute_error: 7.325938, mean_q: 9.301240
  540224/5000000: episode: 5486, duration: 3.032s, episode steps: 99, steps per second: 33, episode reward: -1.000, mean 

  542758/5000000: episode: 5511, duration: 3.091s, episode steps: 98, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.327 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.527057, mean_absolute_error: 7.408383, mean_q: 9.039739
  542789/5000000: episode: 5512, duration: 1.071s, episode steps: 31, steps per second: 29, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.129 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.497568, mean_absolute_error: 7.399386, mean_q: 9.024469
  542816/5000000: episode: 5513, duration: 0.792s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.852 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.932658, mean_absolute_error: 7.412668, mean_q: 9.019410
  542850/5000000: episode: 5514, duration: 1.039s, episode steps: 34, steps per second: 33, episode reward: -1.000, mean 

  544566/5000000: episode: 5539, duration: 4.557s, episode steps: 138, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.609 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 0.306897, mean_absolute_error: 6.963920, mean_q: 8.370446
  544592/5000000: episode: 5540, duration: 0.891s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.385 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.210016, mean_absolute_error: 6.936999, mean_q: 8.341607
  544619/5000000: episode: 5541, duration: 0.902s, episode steps: 27, steps per second: 30, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.552592, mean_absolute_error: 6.939679, mean_q: 8.329976
  544828/5000000: episode: 5542, duration: 7.026s, episode steps: 209, steps per second: 30, episode reward: -1.000, mea

  546004/5000000: episode: 5567, duration: 1.792s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.273 [0.000, 5.000], mean observation: 0.263 [0.000, 24.000], loss: 0.467520, mean_absolute_error: 6.387424, mean_q: 7.673484
  546058/5000000: episode: 5568, duration: 1.583s, episode steps: 54, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.685 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.428883, mean_absolute_error: 6.362472, mean_q: 7.637090
  546088/5000000: episode: 5569, duration: 0.868s, episode steps: 30, steps per second: 35, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.231305, mean_absolute_error: 6.340677, mean_q: 7.624022
  546114/5000000: episode: 5570, duration: 0.792s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean 

  547334/5000000: episode: 5595, duration: 0.700s, episode steps: 26, steps per second: 37, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.462 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.345815, mean_absolute_error: 5.849770, mean_q: 7.018882
  547359/5000000: episode: 5596, duration: 0.830s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.319693, mean_absolute_error: 5.837615, mean_q: 7.005111
  547392/5000000: episode: 5597, duration: 1.023s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.455 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.361339, mean_absolute_error: 5.828239, mean_q: 6.988266
  547419/5000000: episode: 5598, duration: 0.831s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean 

  548973/5000000: episode: 5623, duration: 5.454s, episode steps: 173, steps per second: 32, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.364 [0.000, 5.000], mean observation: 0.223 [0.000, 24.000], loss: 0.272560, mean_absolute_error: 5.250175, mean_q: 6.307980
  549033/5000000: episode: 5624, duration: 1.921s, episode steps: 60, steps per second: 31, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.617 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.231233, mean_absolute_error: 5.202448, mean_q: 6.254423
  549105/5000000: episode: 5625, duration: 2.267s, episode steps: 72, steps per second: 32, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.542 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.236609, mean_absolute_error: 5.181849, mean_q: 6.234056
  549133/5000000: episode: 5626, duration: 0.771s, episode steps: 28, steps per second: 36, episode reward: -1.000, mean

  550421/5000000: episode: 5651, duration: 2.070s, episode steps: 72, steps per second: 35, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.167 [0.000, 5.000], mean observation: 0.280 [0.000, 24.000], loss: 0.187967, mean_absolute_error: 4.658644, mean_q: 5.595064
  550486/5000000: episode: 5652, duration: 2.032s, episode steps: 65, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.400 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.185603, mean_absolute_error: 4.634766, mean_q: 5.562753
  550512/5000000: episode: 5653, duration: 0.765s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.577 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.204862, mean_absolute_error: 4.619045, mean_q: 5.543955
  550740/5000000: episode: 5654, duration: 7.403s, episode steps: 228, steps per second: 31, episode reward: -1.000, mean

  551782/5000000: episode: 5679, duration: 4.711s, episode steps: 144, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.493 [0.000, 5.000], mean observation: 0.279 [0.000, 24.000], loss: 0.226304, mean_absolute_error: 4.174244, mean_q: 5.008863
  551810/5000000: episode: 5680, duration: 0.859s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.138737, mean_absolute_error: 4.137931, mean_q: 4.974412
  551870/5000000: episode: 5681, duration: 1.805s, episode steps: 60, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.183575, mean_absolute_error: 4.125917, mean_q: 4.951912
  551908/5000000: episode: 5682, duration: 1.286s, episode steps: 38, steps per second: 30, episode reward: -1.000, mean

  553387/5000000: episode: 5707, duration: 0.811s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.423 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.083561, mean_absolute_error: 3.573933, mean_q: 4.296189
  553470/5000000: episode: 5708, duration: 2.554s, episode steps: 83, steps per second: 32, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.771 [0.000, 5.000], mean observation: 0.316 [0.000, 24.000], loss: 0.197989, mean_absolute_error: 3.559336, mean_q: 4.270472
  553542/5000000: episode: 5709, duration: 2.349s, episode steps: 72, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.171339, mean_absolute_error: 3.526279, mean_q: 4.229201
  553573/5000000: episode: 5710, duration: 0.943s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean 

  554815/5000000: episode: 5735, duration: 0.747s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.128941, mean_absolute_error: 3.165332, mean_q: 3.795754
  554840/5000000: episode: 5736, duration: 0.782s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.156793, mean_absolute_error: 3.157207, mean_q: 3.781966
  554894/5000000: episode: 5737, duration: 1.543s, episode steps: 54, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.944 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.117634, mean_absolute_error: 3.137760, mean_q: 3.763527
  554919/5000000: episode: 5738, duration: 0.788s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean 

  556479/5000000: episode: 5763, duration: 2.378s, episode steps: 76, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.316 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.088155, mean_absolute_error: 2.626050, mean_q: 3.152377
  556504/5000000: episode: 5764, duration: 0.755s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 0.085104, mean_absolute_error: 2.609276, mean_q: 3.136604
  556532/5000000: episode: 5765, duration: 0.835s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.066319, mean_absolute_error: 2.601988, mean_q: 3.129391
  556560/5000000: episode: 5766, duration: 0.854s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean 

  558426/5000000: episode: 5791, duration: 2.183s, episode steps: 75, steps per second: 34, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.120 [0.000, 5.000], mean observation: 0.234 [0.000, 24.000], loss: 0.077517, mean_absolute_error: 2.106286, mean_q: 2.527485
  558457/5000000: episode: 5792, duration: 0.955s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.065 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.067960, mean_absolute_error: 2.088915, mean_q: 2.507944
  558510/5000000: episode: 5793, duration: 1.609s, episode steps: 53, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.472 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.078543, mean_absolute_error: 2.076486, mean_q: 2.494522
  558547/5000000: episode: 5794, duration: 1.215s, episode steps: 37, steps per second: 30, episode reward: -1.000, mean 

  560172/5000000: episode: 5819, duration: 2.500s, episode steps: 80, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.413 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 0.062781, mean_absolute_error: 1.662633, mean_q: 1.994093
  560225/5000000: episode: 5820, duration: 1.578s, episode steps: 53, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.396 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.049293, mean_absolute_error: 1.644059, mean_q: 1.974292
  560253/5000000: episode: 5821, duration: 0.885s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.029498, mean_absolute_error: 1.632437, mean_q: 1.962651
  560359/5000000: episode: 5822, duration: 3.455s, episode steps: 106, steps per second: 31, episode reward: -1.000, mean

  561722/5000000: episode: 5847, duration: 1.432s, episode steps: 45, steps per second: 31, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.378 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 0.041587, mean_absolute_error: 1.319277, mean_q: 1.584828
  561752/5000000: episode: 5848, duration: 0.988s, episode steps: 30, steps per second: 30, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.024155, mean_absolute_error: 1.310402, mean_q: 1.580512
  561872/5000000: episode: 5849, duration: 4.074s, episode steps: 120, steps per second: 29, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 3.100 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.036528, mean_absolute_error: 1.299784, mean_q: 1.565399
  562050/5000000: episode: 5850, duration: 5.684s, episode steps: 178, steps per second: 31, episode reward: -1.000, mea

  563358/5000000: episode: 5875, duration: 0.747s, episode steps: 28, steps per second: 37, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.107 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.027443, mean_absolute_error: 1.014635, mean_q: 1.222031
  563416/5000000: episode: 5876, duration: 1.773s, episode steps: 58, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.023620, mean_absolute_error: 1.005712, mean_q: 1.210868
  563457/5000000: episode: 5877, duration: 1.379s, episode steps: 41, steps per second: 30, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.195 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.033037, mean_absolute_error: 0.998660, mean_q: 1.203598
  563488/5000000: episode: 5878, duration: 0.995s, episode steps: 31, steps per second: 31, episode reward: -1.000, mean 

  565045/5000000: episode: 5903, duration: 0.890s, episode steps: 29, steps per second: 33, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.241 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.022850, mean_absolute_error: 0.736720, mean_q: 0.882634
  565070/5000000: episode: 5904, duration: 0.711s, episode steps: 25, steps per second: 35, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.019797, mean_absolute_error: 0.730748, mean_q: 0.878395
  565258/5000000: episode: 5905, duration: 6.132s, episode steps: 188, steps per second: 31, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.856 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.018933, mean_absolute_error: 0.713490, mean_q: 0.854900
  565283/5000000: episode: 5906, duration: 0.776s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean

  566661/5000000: episode: 5931, duration: 0.977s, episode steps: 29, steps per second: 30, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.012762, mean_absolute_error: 0.519953, mean_q: 0.624975
  566688/5000000: episode: 5932, duration: 0.781s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.185 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.018113, mean_absolute_error: 0.516000, mean_q: 0.621043
  566715/5000000: episode: 5933, duration: 0.781s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.593 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.012166, mean_absolute_error: 0.511153, mean_q: 0.620182
  566740/5000000: episode: 5934, duration: 0.793s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean 

  568600/5000000: episode: 5959, duration: 8.478s, episode steps: 269, steps per second: 32, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.401 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 0.010943, mean_absolute_error: 0.332709, mean_q: 0.401568
  568634/5000000: episode: 5960, duration: 1.041s, episode steps: 34, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.353 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.007898, mean_absolute_error: 0.316042, mean_q: 0.382365
  568664/5000000: episode: 5961, duration: 0.897s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.967 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.009879, mean_absolute_error: 0.314784, mean_q: 0.379479
  568748/5000000: episode: 5962, duration: 2.677s, episode steps: 84, steps per second: 31, episode reward: -1.000, mean

  569906/5000000: episode: 5987, duration: 0.775s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.003941, mean_absolute_error: 0.207787, mean_q: 0.253636
  570003/5000000: episode: 5988, duration: 3.200s, episode steps: 97, steps per second: 30, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.804 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 0.008237, mean_absolute_error: 0.205101, mean_q: 0.247596
  570032/5000000: episode: 5989, duration: 0.925s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.414 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.007400, mean_absolute_error: 0.194505, mean_q: 0.236877
  570070/5000000: episode: 5990, duration: 1.175s, episode steps: 38, steps per second: 32, episode reward: -1.000, mean 

  571518/5000000: episode: 6015, duration: 0.858s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.714 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.005447, mean_absolute_error: 0.085356, mean_q: 0.105836
  571553/5000000: episode: 6016, duration: 1.183s, episode steps: 35, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.657 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.004893, mean_absolute_error: 0.083516, mean_q: 0.103013
  571580/5000000: episode: 6017, duration: 0.836s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 1.778 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.004196, mean_absolute_error: 0.082221, mean_q: 0.098734
  571612/5000000: episode: 6018, duration: 1.010s, episode steps: 32, steps per second: 32, episode reward: -1.000, mean 

  572752/5000000: episode: 6043, duration: 2.167s, episode steps: 72, steps per second: 33, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.347 [0.000, 5.000], mean observation: 0.271 [0.000, 24.000], loss: 0.005079, mean_absolute_error: 0.007623, mean_q: 0.008768
  572783/5000000: episode: 6044, duration: 0.953s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.387 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.005109, mean_absolute_error: 0.010840, mean_q: 0.000220
  572809/5000000: episode: 6045, duration: 0.757s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.231 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.004258, mean_absolute_error: 0.006325, mean_q: 0.007176
  572924/5000000: episode: 6046, duration: 3.728s, episode steps: 115, steps per second: 31, episode reward: -1.000, mean

  574632/5000000: episode: 6071, duration: 3.731s, episode steps: 115, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.696 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.004013, mean_absolute_error: 0.092810, mean_q: -0.100966
  574720/5000000: episode: 6072, duration: 2.890s, episode steps: 88, steps per second: 30, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.693 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.004652, mean_absolute_error: 0.096677, mean_q: -0.108160
  574748/5000000: episode: 6073, duration: 0.793s, episode steps: 28, steps per second: 35, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 1.750 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.005277, mean_absolute_error: 0.105328, mean_q: -0.115514
  574777/5000000: episode: 6074, duration: 0.908s, episode steps: 29, steps per second: 32, episode reward: -1.000, m

  576547/5000000: episode: 6099, duration: 0.972s, episode steps: 32, steps per second: 33, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.406 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.003105, mean_absolute_error: 0.181233, mean_q: -0.207306
  576580/5000000: episode: 6100, duration: 0.988s, episode steps: 33, steps per second: 33, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.485 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.002407, mean_absolute_error: 0.176774, mean_q: -0.204549
  576606/5000000: episode: 6101, duration: 0.853s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.231 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.004179, mean_absolute_error: 0.176220, mean_q: -0.200889
  576675/5000000: episode: 6102, duration: 2.017s, episode steps: 69, steps per second: 34, episode reward: -1.000, me

  578461/5000000: episode: 6127, duration: 2.667s, episode steps: 83, steps per second: 31, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.578 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.002987, mean_absolute_error: 0.223556, mean_q: -0.259906
  578492/5000000: episode: 6128, duration: 0.914s, episode steps: 31, steps per second: 34, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.003839, mean_absolute_error: 0.226519, mean_q: -0.263095
  578517/5000000: episode: 6129, duration: 0.755s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002036, mean_absolute_error: 0.225407, mean_q: -0.263888
  578561/5000000: episode: 6130, duration: 1.270s, episode steps: 44, steps per second: 35, episode reward: -1.000, me

  579686/5000000: episode: 6155, duration: 2.251s, episode steps: 86, steps per second: 38, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002947, mean_absolute_error: 0.247447, mean_q: -0.287211
  579835/5000000: episode: 6156, duration: 4.620s, episode steps: 149, steps per second: 32, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.430 [0.000, 5.000], mean observation: 0.323 [0.000, 24.000], loss: 0.002589, mean_absolute_error: 0.247818, mean_q: -0.290498
  579925/5000000: episode: 6157, duration: 2.753s, episode steps: 90, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.544 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.002344, mean_absolute_error: 0.250537, mean_q: -0.293420
  579950/5000000: episode: 6158, duration: 0.712s, episode steps: 25, steps per second: 35, episode reward: -1.000, m

  581680/5000000: episode: 6183, duration: 1.124s, episode steps: 37, steps per second: 33, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.676 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.003762, mean_absolute_error: 0.287624, mean_q: -0.333375
  581744/5000000: episode: 6184, duration: 1.937s, episode steps: 64, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.688 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.002811, mean_absolute_error: 0.287117, mean_q: -0.334545
  581833/5000000: episode: 6185, duration: 2.856s, episode steps: 89, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.382 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 0.002256, mean_absolute_error: 0.285650, mean_q: -0.334999
  581858/5000000: episode: 6186, duration: 0.797s, episode steps: 25, steps per second: 31, episode reward: -1.000, me

  583176/5000000: episode: 6211, duration: 0.824s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.852 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.003743, mean_absolute_error: 0.310752, mean_q: -0.363062
  583206/5000000: episode: 6212, duration: 0.941s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001497, mean_absolute_error: 0.309071, mean_q: -0.362920
  583237/5000000: episode: 6213, duration: 1.030s, episode steps: 31, steps per second: 30, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.710 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001854, mean_absolute_error: 0.304311, mean_q: -0.359840
  583264/5000000: episode: 6214, duration: 0.841s, episode steps: 27, steps per second: 32, episode reward: -1.000, me

  584793/5000000: episode: 6239, duration: 0.854s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.667 [1.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002624, mean_absolute_error: 0.325013, mean_q: -0.385864
  584981/5000000: episode: 6240, duration: 5.091s, episode steps: 188, steps per second: 37, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.495 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.002030, mean_absolute_error: 0.328866, mean_q: -0.388588
  585008/5000000: episode: 6241, duration: 1.061s, episode steps: 27, steps per second: 25, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.002782, mean_absolute_error: 0.333212, mean_q: -0.391692
  585045/5000000: episode: 6242, duration: 1.200s, episode steps: 37, steps per second: 31, episode reward: -1.000, m

  586635/5000000: episode: 6267, duration: 3.045s, episode steps: 101, steps per second: 33, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001617, mean_absolute_error: 0.338685, mean_q: -0.399167
  586687/5000000: episode: 6268, duration: 1.665s, episode steps: 52, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 0.001303, mean_absolute_error: 0.336688, mean_q: -0.397530
  586715/5000000: episode: 6269, duration: 0.858s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.750 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001620, mean_absolute_error: 0.336849, mean_q: -0.398123
  586795/5000000: episode: 6270, duration: 2.534s, episode steps: 80, steps per second: 32, episode reward: -1.000, m

  587957/5000000: episode: 6295, duration: 0.763s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.560 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001329, mean_absolute_error: 0.341825, mean_q: -0.404945
  588274/5000000: episode: 6296, duration: 10.486s, episode steps: 317, steps per second: 30, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.413 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.002012, mean_absolute_error: 0.344894, mean_q: -0.407101
  588350/5000000: episode: 6297, duration: 2.348s, episode steps: 76, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.803 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001860, mean_absolute_error: 0.349066, mean_q: -0.412107
  588521/5000000: episode: 6298, duration: 5.379s, episode steps: 171, steps per second: 32, episode reward: -1.000,

  590103/5000000: episode: 6323, duration: 0.851s, episode steps: 30, steps per second: 35, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.300 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.002247, mean_absolute_error: 0.357722, mean_q: -0.422705
  590141/5000000: episode: 6324, duration: 1.283s, episode steps: 38, steps per second: 30, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.053 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.001793, mean_absolute_error: 0.359718, mean_q: -0.423436
  590219/5000000: episode: 6325, duration: 2.594s, episode steps: 78, steps per second: 30, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.278 [0.000, 24.000], loss: 0.001926, mean_absolute_error: 0.359398, mean_q: -0.423720
  590287/5000000: episode: 6326, duration: 2.061s, episode steps: 68, steps per second: 33, episode reward: -1.000, me

  591385/5000000: episode: 6351, duration: 0.869s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.000713, mean_absolute_error: 0.369136, mean_q: -0.438368
  591417/5000000: episode: 6352, duration: 1.092s, episode steps: 32, steps per second: 29, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.219 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.000923, mean_absolute_error: 0.368196, mean_q: -0.438001
  591485/5000000: episode: 6353, duration: 1.998s, episode steps: 68, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.279 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.002155, mean_absolute_error: 0.373676, mean_q: -0.441357
  591523/5000000: episode: 6354, duration: 1.292s, episode steps: 38, steps per second: 29, episode reward: -1.000, me

  592531/5000000: episode: 6379, duration: 2.004s, episode steps: 65, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.002054, mean_absolute_error: 0.372420, mean_q: -0.439471
  592567/5000000: episode: 6380, duration: 1.129s, episode steps: 36, steps per second: 32, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.278 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.001254, mean_absolute_error: 0.367936, mean_q: -0.432771
  592620/5000000: episode: 6381, duration: 1.676s, episode steps: 53, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001230, mean_absolute_error: 0.365902, mean_q: -0.433642
  592652/5000000: episode: 6382, duration: 1.047s, episode steps: 32, steps per second: 31, episode reward: -1.000, me

  594228/5000000: episode: 6407, duration: 2.799s, episode steps: 88, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.693 [0.000, 5.000], mean observation: 0.268 [0.000, 24.000], loss: 0.002495, mean_absolute_error: 0.369920, mean_q: -0.436068
  594294/5000000: episode: 6408, duration: 2.132s, episode steps: 66, steps per second: 31, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.001890, mean_absolute_error: 0.369372, mean_q: -0.435769
  594378/5000000: episode: 6409, duration: 2.476s, episode steps: 84, steps per second: 34, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001595, mean_absolute_error: 0.370441, mean_q: -0.437644
  594476/5000000: episode: 6410, duration: 2.971s, episode steps: 98, steps per second: 33, episode reward: -1.000, me

  595838/5000000: episode: 6435, duration: 0.975s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.129 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.000481, mean_absolute_error: 0.368326, mean_q: -0.438026
  595863/5000000: episode: 6436, duration: 0.678s, episode steps: 25, steps per second: 37, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001769, mean_absolute_error: 0.371108, mean_q: -0.441106
  595973/5000000: episode: 6437, duration: 3.523s, episode steps: 110, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.636 [0.000, 5.000], mean observation: 0.274 [0.000, 24.000], loss: 0.001877, mean_absolute_error: 0.371702, mean_q: -0.439244
  596030/5000000: episode: 6438, duration: 1.742s, episode steps: 57, steps per second: 33, episode reward: -1.000, m

  597667/5000000: episode: 6463, duration: 1.630s, episode steps: 53, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.491 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001466, mean_absolute_error: 0.373979, mean_q: -0.441879
  597692/5000000: episode: 6464, duration: 0.871s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001173, mean_absolute_error: 0.373560, mean_q: -0.442283
  597717/5000000: episode: 6465, duration: 0.778s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002129, mean_absolute_error: 0.373887, mean_q: -0.442450
  597744/5000000: episode: 6466, duration: 0.846s, episode steps: 27, steps per second: 32, episode reward: -1.000, me

  599035/5000000: episode: 6491, duration: 0.833s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001475, mean_absolute_error: 0.377269, mean_q: -0.445027
  599089/5000000: episode: 6492, duration: 1.592s, episode steps: 54, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.167 [0.000, 5.000], mean observation: 0.218 [0.000, 24.000], loss: 0.001498, mean_absolute_error: 0.379394, mean_q: -0.449556
  599120/5000000: episode: 6493, duration: 0.966s, episode steps: 31, steps per second: 32, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.806 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001834, mean_absolute_error: 0.379948, mean_q: -0.448193
  599151/5000000: episode: 6494, duration: 0.993s, episode steps: 31, steps per second: 31, episode reward: -1.000, me

  600438/5000000: episode: 6519, duration: 1.729s, episode steps: 54, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001920, mean_absolute_error: 0.382220, mean_q: -0.451894
  600499/5000000: episode: 6520, duration: 1.905s, episode steps: 61, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.262 [0.000, 5.000], mean observation: 0.256 [0.000, 24.000], loss: 0.001698, mean_absolute_error: 0.382956, mean_q: -0.451127
  600589/5000000: episode: 6521, duration: 2.936s, episode steps: 90, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.378 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001628, mean_absolute_error: 0.380550, mean_q: -0.450507
  600718/5000000: episode: 6522, duration: 3.916s, episode steps: 129, steps per second: 33, episode reward: -1.000, m

  601786/5000000: episode: 6547, duration: 0.857s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001901, mean_absolute_error: 0.389178, mean_q: -0.460792
  601848/5000000: episode: 6548, duration: 1.829s, episode steps: 62, steps per second: 34, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.597 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.000966, mean_absolute_error: 0.389577, mean_q: -0.460544
  601907/5000000: episode: 6549, duration: 1.848s, episode steps: 59, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.237 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001785, mean_absolute_error: 0.388529, mean_q: -0.457619
  601970/5000000: episode: 6550, duration: 1.885s, episode steps: 63, steps per second: 33, episode reward: -1.000, me

  603323/5000000: episode: 6575, duration: 1.420s, episode steps: 43, steps per second: 30, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.698 [0.000, 5.000], mean observation: 0.256 [0.000, 24.000], loss: 0.001056, mean_absolute_error: 0.391472, mean_q: -0.461576
  603443/5000000: episode: 6576, duration: 3.763s, episode steps: 120, steps per second: 32, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.272 [0.000, 24.000], loss: 0.001527, mean_absolute_error: 0.393211, mean_q: -0.464273
  603500/5000000: episode: 6577, duration: 1.742s, episode steps: 57, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.965 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001855, mean_absolute_error: 0.390199, mean_q: -0.464162
  603531/5000000: episode: 6578, duration: 0.798s, episode steps: 31, steps per second: 39, episode reward: -1.000, m

  604739/5000000: episode: 6603, duration: 1.812s, episode steps: 58, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001325, mean_absolute_error: 0.390813, mean_q: -0.460149
  604766/5000000: episode: 6604, duration: 0.918s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.185 [1.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001532, mean_absolute_error: 0.388561, mean_q: -0.459273
  604820/5000000: episode: 6605, duration: 1.504s, episode steps: 54, steps per second: 36, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.796 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001818, mean_absolute_error: 0.392618, mean_q: -0.464523
  604870/5000000: episode: 6606, duration: 1.692s, episode steps: 50, steps per second: 30, episode reward: -1.000, me

  606405/5000000: episode: 6631, duration: 4.573s, episode steps: 136, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.463 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 0.001879, mean_absolute_error: 0.391283, mean_q: -0.460270
  606436/5000000: episode: 6632, duration: 1.041s, episode steps: 31, steps per second: 30, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.677 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.002341, mean_absolute_error: 0.390897, mean_q: -0.460058
  606607/5000000: episode: 6633, duration: 5.156s, episode steps: 171, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.673 [0.000, 5.000], mean observation: 0.190 [0.000, 24.000], loss: 0.001176, mean_absolute_error: 0.387818, mean_q: -0.459668
  606641/5000000: episode: 6634, duration: 0.970s, episode steps: 34, steps per second: 35, episode reward: -1.000, 

  608344/5000000: episode: 6659, duration: 3.726s, episode steps: 115, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.348 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001379, mean_absolute_error: 0.391129, mean_q: -0.460537
  608396/5000000: episode: 6660, duration: 1.677s, episode steps: 52, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.038 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 0.001493, mean_absolute_error: 0.387476, mean_q: -0.457496
  608426/5000000: episode: 6661, duration: 0.940s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.002122, mean_absolute_error: 0.390499, mean_q: -0.460610
  608451/5000000: episode: 6662, duration: 0.713s, episode steps: 25, steps per second: 35, episode reward: -1.000, m

  609596/5000000: episode: 6687, duration: 1.196s, episode steps: 38, steps per second: 32, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 1.895 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 0.001194, mean_absolute_error: 0.389254, mean_q: -0.461195
  609652/5000000: episode: 6688, duration: 1.646s, episode steps: 56, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001615, mean_absolute_error: 0.389611, mean_q: -0.461687
  609687/5000000: episode: 6689, duration: 1.025s, episode steps: 35, steps per second: 34, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.486 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.000916, mean_absolute_error: 0.387991, mean_q: -0.459299
  609744/5000000: episode: 6690, duration: 1.857s, episode steps: 57, steps per second: 31, episode reward: -1.000, me

  611104/5000000: episode: 6715, duration: 0.844s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001056, mean_absolute_error: 0.386943, mean_q: -0.461162
  611159/5000000: episode: 6716, duration: 1.662s, episode steps: 55, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.164 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.002350, mean_absolute_error: 0.389385, mean_q: -0.461572
  611189/5000000: episode: 6717, duration: 0.990s, episode steps: 30, steps per second: 30, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001669, mean_absolute_error: 0.389896, mean_q: -0.458722
  611220/5000000: episode: 6718, duration: 0.988s, episode steps: 31, steps per second: 31, episode reward: -1.000, me

  612890/5000000: episode: 6743, duration: 0.772s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.654 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001423, mean_absolute_error: 0.387376, mean_q: -0.457312
  612918/5000000: episode: 6744, duration: 0.821s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.000996, mean_absolute_error: 0.384034, mean_q: -0.453581
  612946/5000000: episode: 6745, duration: 0.839s, episode steps: 28, steps per second: 33, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.786 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001161, mean_absolute_error: 0.382991, mean_q: -0.452327
  612976/5000000: episode: 6746, duration: 0.896s, episode steps: 30, steps per second: 33, episode reward: -1.000, me

  614359/5000000: episode: 6771, duration: 0.908s, episode steps: 31, steps per second: 34, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001469, mean_absolute_error: 0.390471, mean_q: -0.461362
  614415/5000000: episode: 6772, duration: 1.891s, episode steps: 56, steps per second: 30, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.696 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001460, mean_absolute_error: 0.387957, mean_q: -0.458741
  614446/5000000: episode: 6773, duration: 0.952s, episode steps: 31, steps per second: 33, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.677 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001032, mean_absolute_error: 0.389913, mean_q: -0.461237
  614472/5000000: episode: 6774, duration: 0.919s, episode steps: 26, steps per second: 28, episode reward: -1.000, me

  615565/5000000: episode: 6799, duration: 1.008s, episode steps: 34, steps per second: 34, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 1.735 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.002027, mean_absolute_error: 0.384608, mean_q: -0.456770
  615590/5000000: episode: 6800, duration: 0.754s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001114, mean_absolute_error: 0.388295, mean_q: -0.460597
  615616/5000000: episode: 6801, duration: 0.845s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.077 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002297, mean_absolute_error: 0.390573, mean_q: -0.460761
  615641/5000000: episode: 6802, duration: 0.850s, episode steps: 25, steps per second: 29, episode reward: -1.000, me

  617015/5000000: episode: 6827, duration: 2.723s, episode steps: 81, steps per second: 30, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.617 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001186, mean_absolute_error: 0.387355, mean_q: -0.459940
  617041/5000000: episode: 6828, duration: 0.885s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002094, mean_absolute_error: 0.390691, mean_q: -0.460936
  617072/5000000: episode: 6829, duration: 1.052s, episode steps: 31, steps per second: 29, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.516 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.002186, mean_absolute_error: 0.391805, mean_q: -0.461388
  617163/5000000: episode: 6830, duration: 2.930s, episode steps: 91, steps per second: 31, episode reward: -1.000, me

  618447/5000000: episode: 6855, duration: 0.771s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001091, mean_absolute_error: 0.386760, mean_q: -0.458012
  618474/5000000: episode: 6856, duration: 0.820s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001525, mean_absolute_error: 0.388116, mean_q: -0.459036
  618512/5000000: episode: 6857, duration: 1.183s, episode steps: 38, steps per second: 32, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.184 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 0.002258, mean_absolute_error: 0.392052, mean_q: -0.464374
  618635/5000000: episode: 6858, duration: 3.438s, episode steps: 123, steps per second: 36, episode reward: -1.000, m

  619681/5000000: episode: 6883, duration: 1.264s, episode steps: 40, steps per second: 32, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.002628, mean_absolute_error: 0.394657, mean_q: -0.465210
  619709/5000000: episode: 6884, duration: 0.864s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.214 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001292, mean_absolute_error: 0.394119, mean_q: -0.463831
  619800/5000000: episode: 6885, duration: 2.623s, episode steps: 91, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.264 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.000792, mean_absolute_error: 0.390209, mean_q: -0.464214
  620082/5000000: episode: 6886, duration: 9.072s, episode steps: 282, steps per second: 31, episode reward: -1.000, m

  621610/5000000: episode: 6911, duration: 1.056s, episode steps: 32, steps per second: 30, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.531 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001133, mean_absolute_error: 0.393667, mean_q: -0.462554
  621670/5000000: episode: 6912, duration: 1.958s, episode steps: 60, steps per second: 31, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.767 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001627, mean_absolute_error: 0.386218, mean_q: -0.455354
  621720/5000000: episode: 6913, duration: 1.638s, episode steps: 50, steps per second: 31, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001922, mean_absolute_error: 0.390100, mean_q: -0.458305
  621869/5000000: episode: 6914, duration: 4.706s, episode steps: 149, steps per second: 32, episode reward: -1.000, m

  623519/5000000: episode: 6939, duration: 8.856s, episode steps: 268, steps per second: 30, episode reward: -1.000, mean reward: -0.004 [-1.000, 0.000], mean action: 2.608 [0.000, 5.000], mean observation: 0.282 [0.000, 24.000], loss: 0.001531, mean_absolute_error: 0.389427, mean_q: -0.460686
  623718/5000000: episode: 6940, duration: 6.341s, episode steps: 199, steps per second: 31, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.603 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001634, mean_absolute_error: 0.388833, mean_q: -0.459508
  623805/5000000: episode: 6941, duration: 2.688s, episode steps: 87, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.001731, mean_absolute_error: 0.388079, mean_q: -0.459263
  623887/5000000: episode: 6942, duration: 2.263s, episode steps: 82, steps per second: 36, episode reward: -1.000, 

  625459/5000000: episode: 6967, duration: 3.205s, episode steps: 104, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.548 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.002106, mean_absolute_error: 0.393848, mean_q: -0.464758
  625541/5000000: episode: 6968, duration: 2.673s, episode steps: 82, steps per second: 31, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.854 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001304, mean_absolute_error: 0.393355, mean_q: -0.467092
  625570/5000000: episode: 6969, duration: 0.925s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.207 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001682, mean_absolute_error: 0.397158, mean_q: -0.471526
  625599/5000000: episode: 6970, duration: 0.948s, episode steps: 29, steps per second: 31, episode reward: -1.000, m

  626955/5000000: episode: 6995, duration: 1.700s, episode steps: 55, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.091 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001543, mean_absolute_error: 0.392920, mean_q: -0.464758
  626982/5000000: episode: 6996, duration: 0.863s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001964, mean_absolute_error: 0.394971, mean_q: -0.464061
  627007/5000000: episode: 6997, duration: 0.869s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.000918, mean_absolute_error: 0.393038, mean_q: -0.462400
  627126/5000000: episode: 6998, duration: 3.628s, episode steps: 119, steps per second: 33, episode reward: -1.000, m

  628799/5000000: episode: 7023, duration: 0.791s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.111 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001199, mean_absolute_error: 0.386872, mean_q: -0.458248
  628825/5000000: episode: 7024, duration: 0.842s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.615 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 0.000718, mean_absolute_error: 0.382040, mean_q: -0.453108
  628861/5000000: episode: 7025, duration: 1.144s, episode steps: 36, steps per second: 31, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.083 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.002186, mean_absolute_error: 0.386104, mean_q: -0.455675
  628886/5000000: episode: 7026, duration: 0.748s, episode steps: 25, steps per second: 33, episode reward: -1.000, me

  630390/5000000: episode: 7051, duration: 9.752s, episode steps: 292, steps per second: 30, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.558 [0.000, 5.000], mean observation: 0.184 [0.000, 24.000], loss: 0.001530, mean_absolute_error: 0.384279, mean_q: -0.454138
  630480/5000000: episode: 7052, duration: 2.880s, episode steps: 90, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.544 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001707, mean_absolute_error: 0.384458, mean_q: -0.454094
  630505/5000000: episode: 7053, duration: 0.736s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002948, mean_absolute_error: 0.385861, mean_q: -0.454543
  630591/5000000: episode: 7054, duration: 2.737s, episode steps: 86, steps per second: 31, episode reward: -1.000, m

  631790/5000000: episode: 7079, duration: 2.852s, episode steps: 85, steps per second: 30, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.471 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001375, mean_absolute_error: 0.391919, mean_q: -0.465184
  631845/5000000: episode: 7080, duration: 1.618s, episode steps: 55, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.473 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001955, mean_absolute_error: 0.391951, mean_q: -0.462513
  631902/5000000: episode: 7081, duration: 1.745s, episode steps: 57, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001351, mean_absolute_error: 0.390887, mean_q: -0.463245
  632018/5000000: episode: 7082, duration: 3.610s, episode steps: 116, steps per second: 32, episode reward: -1.000, m

  633651/5000000: episode: 7107, duration: 9.849s, episode steps: 368, steps per second: 37, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.497 [0.000, 5.000], mean observation: 0.348 [0.000, 24.000], loss: 0.001284, mean_absolute_error: 0.388709, mean_q: -0.460605
  633707/5000000: episode: 7108, duration: 1.697s, episode steps: 56, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002097, mean_absolute_error: 0.390801, mean_q: -0.462572
  633884/5000000: episode: 7109, duration: 5.399s, episode steps: 177, steps per second: 33, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.328 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001512, mean_absolute_error: 0.390139, mean_q: -0.460585
  633978/5000000: episode: 7110, duration: 3.013s, episode steps: 94, steps per second: 31, episode reward: -1.000, 

  635786/5000000: episode: 7135, duration: 5.336s, episode steps: 182, steps per second: 34, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.445 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001674, mean_absolute_error: 0.389180, mean_q: -0.460176
  635850/5000000: episode: 7136, duration: 2.146s, episode steps: 64, steps per second: 30, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001208, mean_absolute_error: 0.390550, mean_q: -0.463019
  635884/5000000: episode: 7137, duration: 1.049s, episode steps: 34, steps per second: 32, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.206 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.002134, mean_absolute_error: 0.393391, mean_q: -0.464103
  635939/5000000: episode: 7138, duration: 1.682s, episode steps: 55, steps per second: 33, episode reward: -1.000, m

  637502/5000000: episode: 7163, duration: 1.204s, episode steps: 39, steps per second: 32, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.513 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 0.001036, mean_absolute_error: 0.392038, mean_q: -0.465817
  637532/5000000: episode: 7164, duration: 0.910s, episode steps: 30, steps per second: 33, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 3.233 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001488, mean_absolute_error: 0.393591, mean_q: -0.467345
  637586/5000000: episode: 7165, duration: 1.677s, episode steps: 54, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.389 [0.000, 5.000], mean observation: 0.258 [0.000, 24.000], loss: 0.001238, mean_absolute_error: 0.391903, mean_q: -0.464145
  637690/5000000: episode: 7166, duration: 3.181s, episode steps: 104, steps per second: 33, episode reward: -1.000, m

  638714/5000000: episode: 7191, duration: 1.691s, episode steps: 54, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.722 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001337, mean_absolute_error: 0.391087, mean_q: -0.463900
  638749/5000000: episode: 7192, duration: 1.053s, episode steps: 35, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.514 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001677, mean_absolute_error: 0.392390, mean_q: -0.465047
  638774/5000000: episode: 7193, duration: 0.755s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.840 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001262, mean_absolute_error: 0.390332, mean_q: -0.463181
  638802/5000000: episode: 7194, duration: 0.857s, episode steps: 28, steps per second: 33, episode reward: -1.000, me

  640681/5000000: episode: 7219, duration: 9.492s, episode steps: 346, steps per second: 36, episode reward: -1.000, mean reward: -0.003 [-1.000, 0.000], mean action: 2.639 [0.000, 5.000], mean observation: 0.309 [0.000, 24.000], loss: 0.001211, mean_absolute_error: 0.391054, mean_q: -0.463560
  640806/5000000: episode: 7220, duration: 4.014s, episode steps: 125, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.424 [0.000, 5.000], mean observation: 0.269 [0.000, 24.000], loss: 0.001633, mean_absolute_error: 0.390799, mean_q: -0.461240
  640994/5000000: episode: 7221, duration: 6.128s, episode steps: 188, steps per second: 31, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.537 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001660, mean_absolute_error: 0.390466, mean_q: -0.461043
  641019/5000000: episode: 7222, duration: 0.910s, episode steps: 25, steps per second: 27, episode reward: -1.000,

  642800/5000000: episode: 7247, duration: 1.882s, episode steps: 62, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.565 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001594, mean_absolute_error: 0.392871, mean_q: -0.465065
  642825/5000000: episode: 7248, duration: 0.753s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001424, mean_absolute_error: 0.393502, mean_q: -0.466093
  642850/5000000: episode: 7249, duration: 0.794s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001077, mean_absolute_error: 0.394207, mean_q: -0.466454
  642941/5000000: episode: 7250, duration: 2.787s, episode steps: 91, steps per second: 33, episode reward: -1.000, me

  644659/5000000: episode: 7275, duration: 1.214s, episode steps: 43, steps per second: 35, episode reward: -1.000, mean reward: -0.023 [-1.000, 0.000], mean action: 2.372 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001163, mean_absolute_error: 0.389798, mean_q: -0.459990
  644686/5000000: episode: 7276, duration: 0.804s, episode steps: 27, steps per second: 34, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.000842, mean_absolute_error: 0.387325, mean_q: -0.460371
  644744/5000000: episode: 7277, duration: 1.966s, episode steps: 58, steps per second: 29, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.292 [0.000, 24.000], loss: 0.001267, mean_absolute_error: 0.386874, mean_q: -0.459348
  644800/5000000: episode: 7278, duration: 1.599s, episode steps: 56, steps per second: 35, episode reward: -1.000, me

  646249/5000000: episode: 7303, duration: 0.873s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001607, mean_absolute_error: 0.392073, mean_q: -0.464192
  646274/5000000: episode: 7304, duration: 0.819s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002315, mean_absolute_error: 0.396576, mean_q: -0.468911
  646302/5000000: episode: 7305, duration: 0.824s, episode steps: 28, steps per second: 34, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.000799, mean_absolute_error: 0.392998, mean_q: -0.466524
  646332/5000000: episode: 7306, duration: 0.971s, episode steps: 30, steps per second: 31, episode reward: -1.000, me

  647390/5000000: episode: 7331, duration: 1.788s, episode steps: 58, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.776 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001552, mean_absolute_error: 0.392414, mean_q: -0.464671
  647419/5000000: episode: 7332, duration: 0.879s, episode steps: 29, steps per second: 33, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.310 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.000616, mean_absolute_error: 0.390734, mean_q: -0.464414
  647444/5000000: episode: 7333, duration: 0.760s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001427, mean_absolute_error: 0.392207, mean_q: -0.466213
  647666/5000000: episode: 7334, duration: 6.533s, episode steps: 222, steps per second: 34, episode reward: -1.000, m

  649025/5000000: episode: 7359, duration: 1.363s, episode steps: 39, steps per second: 29, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.001058, mean_absolute_error: 0.388133, mean_q: -0.460965
  649080/5000000: episode: 7360, duration: 1.685s, episode steps: 55, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.727 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001823, mean_absolute_error: 0.388810, mean_q: -0.459933
  649107/5000000: episode: 7361, duration: 0.839s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.556 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001519, mean_absolute_error: 0.390317, mean_q: -0.460796
  649161/5000000: episode: 7362, duration: 1.767s, episode steps: 54, steps per second: 31, episode reward: -1.000, me

  650612/5000000: episode: 7387, duration: 4.368s, episode steps: 137, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.547 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001740, mean_absolute_error: 0.392484, mean_q: -0.464205
  650717/5000000: episode: 7388, duration: 3.432s, episode steps: 105, steps per second: 31, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.381 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001456, mean_absolute_error: 0.392312, mean_q: -0.463696
  650865/5000000: episode: 7389, duration: 4.349s, episode steps: 148, steps per second: 34, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.351 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001286, mean_absolute_error: 0.389757, mean_q: -0.461811
  650893/5000000: episode: 7390, duration: 0.939s, episode steps: 28, steps per second: 30, episode reward: -1.000,

  652245/5000000: episode: 7415, duration: 2.798s, episode steps: 90, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.567 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001677, mean_absolute_error: 0.395525, mean_q: -0.467043
  652311/5000000: episode: 7416, duration: 2.060s, episode steps: 66, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001479, mean_absolute_error: 0.395071, mean_q: -0.466590
  652336/5000000: episode: 7417, duration: 0.737s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001777, mean_absolute_error: 0.393467, mean_q: -0.464703
  652362/5000000: episode: 7418, duration: 0.814s, episode steps: 26, steps per second: 32, episode reward: -1.000, me

  654333/5000000: episode: 7443, duration: 2.257s, episode steps: 71, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.676 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001084, mean_absolute_error: 0.390062, mean_q: -0.462061
  654438/5000000: episode: 7444, duration: 3.268s, episode steps: 105, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.295 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001376, mean_absolute_error: 0.391489, mean_q: -0.463962
  654595/5000000: episode: 7445, duration: 5.108s, episode steps: 157, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.382 [0.000, 5.000], mean observation: 0.295 [0.000, 24.000], loss: 0.001358, mean_absolute_error: 0.389641, mean_q: -0.460268
  654623/5000000: episode: 7446, duration: 0.938s, episode steps: 28, steps per second: 30, episode reward: -1.000, 

  656242/5000000: episode: 7471, duration: 1.549s, episode steps: 51, steps per second: 33, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.765 [0.000, 5.000], mean observation: 0.234 [0.000, 24.000], loss: 0.001418, mean_absolute_error: 0.391148, mean_q: -0.462211
  656267/5000000: episode: 7472, duration: 0.630s, episode steps: 25, steps per second: 40, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.240 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.000732, mean_absolute_error: 0.386185, mean_q: -0.459906
  656300/5000000: episode: 7473, duration: 1.021s, episode steps: 33, steps per second: 32, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.485 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001378, mean_absolute_error: 0.388551, mean_q: -0.460317
  656325/5000000: episode: 7474, duration: 0.749s, episode steps: 25, steps per second: 33, episode reward: -1.000, me

  657756/5000000: episode: 7499, duration: 1.693s, episode steps: 55, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.527 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 0.002101, mean_absolute_error: 0.395024, mean_q: -0.466595
  657788/5000000: episode: 7500, duration: 0.886s, episode steps: 32, steps per second: 36, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.875 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.000981, mean_absolute_error: 0.393919, mean_q: -0.467871
  657846/5000000: episode: 7501, duration: 1.732s, episode steps: 58, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.586 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.002363, mean_absolute_error: 0.397172, mean_q: -0.467652
  657908/5000000: episode: 7502, duration: 1.803s, episode steps: 62, steps per second: 34, episode reward: -1.000, me

  659497/5000000: episode: 7527, duration: 2.542s, episode steps: 83, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.410 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001774, mean_absolute_error: 0.394441, mean_q: -0.464234
  659523/5000000: episode: 7528, duration: 0.868s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001218, mean_absolute_error: 0.389614, mean_q: -0.461818
  659577/5000000: episode: 7529, duration: 1.756s, episode steps: 54, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.944 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001812, mean_absolute_error: 0.393089, mean_q: -0.466380
  659604/5000000: episode: 7530, duration: 0.782s, episode steps: 27, steps per second: 35, episode reward: -1.000, me

  661293/5000000: episode: 7555, duration: 1.904s, episode steps: 65, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.431 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 0.001250, mean_absolute_error: 0.390590, mean_q: -0.463588
  661361/5000000: episode: 7556, duration: 2.008s, episode steps: 68, steps per second: 34, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.206 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.002233, mean_absolute_error: 0.394101, mean_q: -0.467198
  661492/5000000: episode: 7557, duration: 3.978s, episode steps: 131, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.664 [0.000, 5.000], mean observation: 0.264 [0.000, 24.000], loss: 0.001698, mean_absolute_error: 0.394996, mean_q: -0.467387
  661569/5000000: episode: 7558, duration: 2.388s, episode steps: 77, steps per second: 32, episode reward: -1.000, m

  662929/5000000: episode: 7583, duration: 0.888s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 1.964 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.002056, mean_absolute_error: 0.395799, mean_q: -0.466038
  662958/5000000: episode: 7584, duration: 0.992s, episode steps: 29, steps per second: 29, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.103 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001094, mean_absolute_error: 0.395252, mean_q: -0.467428
  662988/5000000: episode: 7585, duration: 0.987s, episode steps: 30, steps per second: 30, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001334, mean_absolute_error: 0.391055, mean_q: -0.464815
  663019/5000000: episode: 7586, duration: 1.081s, episode steps: 31, steps per second: 29, episode reward: -1.000, me

  664707/5000000: episode: 7611, duration: 0.761s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 0.001696, mean_absolute_error: 0.399840, mean_q: -0.467148
  664779/5000000: episode: 7612, duration: 2.351s, episode steps: 72, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.625 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.001290, mean_absolute_error: 0.394356, mean_q: -0.465358
  664866/5000000: episode: 7613, duration: 2.757s, episode steps: 87, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.299 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001526, mean_absolute_error: 0.397493, mean_q: -0.471519
  664946/5000000: episode: 7614, duration: 2.621s, episode steps: 80, steps per second: 31, episode reward: -1.000, me

  666181/5000000: episode: 7639, duration: 2.921s, episode steps: 92, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.304 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.002018, mean_absolute_error: 0.396238, mean_q: -0.467366
  666206/5000000: episode: 7640, duration: 0.806s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.080 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.000715, mean_absolute_error: 0.395302, mean_q: -0.467818
  666292/5000000: episode: 7641, duration: 2.580s, episode steps: 86, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 0.001134, mean_absolute_error: 0.392852, mean_q: -0.465983
  666389/5000000: episode: 7642, duration: 3.270s, episode steps: 97, steps per second: 30, episode reward: -1.000, me

  667766/5000000: episode: 7667, duration: 1.244s, episode steps: 38, steps per second: 31, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.816 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.001632, mean_absolute_error: 0.394711, mean_q: -0.466845
  667859/5000000: episode: 7668, duration: 2.932s, episode steps: 93, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.645 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001616, mean_absolute_error: 0.396634, mean_q: -0.468846
  667949/5000000: episode: 7669, duration: 2.668s, episode steps: 90, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.411 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001289, mean_absolute_error: 0.393538, mean_q: -0.466064
  667987/5000000: episode: 7670, duration: 1.122s, episode steps: 38, steps per second: 34, episode reward: -1.000, me

  669033/5000000: episode: 7695, duration: 1.598s, episode steps: 51, steps per second: 32, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.784 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 0.001795, mean_absolute_error: 0.399015, mean_q: -0.471840
  669253/5000000: episode: 7696, duration: 6.776s, episode steps: 220, steps per second: 32, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.627 [0.000, 5.000], mean observation: 0.343 [0.000, 24.000], loss: 0.001384, mean_absolute_error: 0.398773, mean_q: -0.471753
  669278/5000000: episode: 7697, duration: 0.811s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.200 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001553, mean_absolute_error: 0.398505, mean_q: -0.473012
  669389/5000000: episode: 7698, duration: 3.516s, episode steps: 111, steps per second: 32, episode reward: -1.000, 

  670689/5000000: episode: 7723, duration: 1.281s, episode steps: 39, steps per second: 30, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.846 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.001503, mean_absolute_error: 0.391147, mean_q: -0.462178
  670742/5000000: episode: 7724, duration: 1.514s, episode steps: 53, steps per second: 35, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.189 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001906, mean_absolute_error: 0.391123, mean_q: -0.460948
  670929/5000000: episode: 7725, duration: 5.893s, episode steps: 187, steps per second: 32, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.358 [0.000, 5.000], mean observation: 0.270 [0.000, 24.000], loss: 0.001751, mean_absolute_error: 0.393054, mean_q: -0.463441
  670984/5000000: episode: 7726, duration: 1.754s, episode steps: 55, steps per second: 31, episode reward: -1.000, m

  672503/5000000: episode: 7751, duration: 3.746s, episode steps: 125, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.504 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.387218, mean_q: -0.458015
  672530/5000000: episode: 7752, duration: 0.863s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.926 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.000865, mean_absolute_error: 0.387204, mean_q: -0.457732
  672556/5000000: episode: 7753, duration: 0.758s, episode steps: 26, steps per second: 34, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.269 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000893, mean_absolute_error: 0.383492, mean_q: -0.454443
  672668/5000000: episode: 7754, duration: 3.558s, episode steps: 112, steps per second: 31, episode reward: -1.000, 

  674286/5000000: episode: 7779, duration: 1.643s, episode steps: 56, steps per second: 34, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.357 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001526, mean_absolute_error: 0.390228, mean_q: -0.462577
  674315/5000000: episode: 7780, duration: 0.926s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.002635, mean_absolute_error: 0.394213, mean_q: -0.464322
  674344/5000000: episode: 7781, duration: 0.940s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 3.103 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.002018, mean_absolute_error: 0.395137, mean_q: -0.467230
  674373/5000000: episode: 7782, duration: 0.923s, episode steps: 29, steps per second: 31, episode reward: -1.000, me

  676056/5000000: episode: 7807, duration: 0.869s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002410, mean_absolute_error: 0.394916, mean_q: -0.465576
  676087/5000000: episode: 7808, duration: 1.025s, episode steps: 31, steps per second: 30, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.774 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.002450, mean_absolute_error: 0.396184, mean_q: -0.466741
  676138/5000000: episode: 7809, duration: 1.622s, episode steps: 51, steps per second: 31, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.902 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001240, mean_absolute_error: 0.391186, mean_q: -0.461957
  676200/5000000: episode: 7810, duration: 1.991s, episode steps: 62, steps per second: 31, episode reward: -1.000, me

  677726/5000000: episode: 7835, duration: 0.879s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 1.893 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001938, mean_absolute_error: 0.392726, mean_q: -0.462484
  677752/5000000: episode: 7836, duration: 0.849s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.115 [1.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001563, mean_absolute_error: 0.392288, mean_q: -0.462150
  677834/5000000: episode: 7837, duration: 2.427s, episode steps: 82, steps per second: 34, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.720 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001753, mean_absolute_error: 0.392821, mean_q: -0.464663
  677924/5000000: episode: 7838, duration: 2.765s, episode steps: 90, steps per second: 33, episode reward: -1.000, me

  679375/5000000: episode: 7863, duration: 0.941s, episode steps: 30, steps per second: 32, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.933 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001666, mean_absolute_error: 0.389872, mean_q: -0.459977
  679411/5000000: episode: 7864, duration: 1.090s, episode steps: 36, steps per second: 33, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.001397, mean_absolute_error: 0.390063, mean_q: -0.461601
  679437/5000000: episode: 7865, duration: 0.747s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.346 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001406, mean_absolute_error: 0.387361, mean_q: -0.457820
  679487/5000000: episode: 7866, duration: 1.504s, episode steps: 50, steps per second: 33, episode reward: -1.000, me

  680900/5000000: episode: 7891, duration: 0.859s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001603, mean_absolute_error: 0.387462, mean_q: -0.455184
  680927/5000000: episode: 7892, duration: 0.944s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001721, mean_absolute_error: 0.384633, mean_q: -0.456106
  680959/5000000: episode: 7893, duration: 0.994s, episode steps: 32, steps per second: 32, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.375 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.001569, mean_absolute_error: 0.387138, mean_q: -0.459364
  681196/5000000: episode: 7894, duration: 7.286s, episode steps: 237, steps per second: 33, episode reward: -1.000, m

  682693/5000000: episode: 7919, duration: 1.174s, episode steps: 38, steps per second: 32, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.421 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001951, mean_absolute_error: 0.384914, mean_q: -0.453593
  682799/5000000: episode: 7920, duration: 3.327s, episode steps: 106, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.245 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.001501, mean_absolute_error: 0.383461, mean_q: -0.451774
  682824/5000000: episode: 7921, duration: 0.753s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.040 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001489, mean_absolute_error: 0.383719, mean_q: -0.451968
  682866/5000000: episode: 7922, duration: 1.305s, episode steps: 42, steps per second: 32, episode reward: -1.000, m

  684587/5000000: episode: 7947, duration: 0.764s, episode steps: 27, steps per second: 35, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.296 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002221, mean_absolute_error: 0.386636, mean_q: -0.457214
  684676/5000000: episode: 7948, duration: 2.863s, episode steps: 89, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.494 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001457, mean_absolute_error: 0.384973, mean_q: -0.454103
  684705/5000000: episode: 7949, duration: 0.895s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.002247, mean_absolute_error: 0.386044, mean_q: -0.456847
  684763/5000000: episode: 7950, duration: 1.886s, episode steps: 58, steps per second: 31, episode reward: -1.000, me

  686496/5000000: episode: 7975, duration: 3.449s, episode steps: 102, steps per second: 30, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.275 [0.000, 5.000], mean observation: 0.282 [0.000, 24.000], loss: 0.001817, mean_absolute_error: 0.385926, mean_q: -0.456019
  686541/5000000: episode: 7976, duration: 1.400s, episode steps: 45, steps per second: 32, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.422 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.001342, mean_absolute_error: 0.385435, mean_q: -0.454391
  686722/5000000: episode: 7977, duration: 5.897s, episode steps: 181, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.635 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001784, mean_absolute_error: 0.386309, mean_q: -0.457131
  686747/5000000: episode: 7978, duration: 0.801s, episode steps: 25, steps per second: 31, episode reward: -1.000, 

  688754/5000000: episode: 8003, duration: 0.919s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.296 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002056, mean_absolute_error: 0.384507, mean_q: -0.453256
  688780/5000000: episode: 8004, duration: 0.843s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.962 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000917, mean_absolute_error: 0.383118, mean_q: -0.451139
  688848/5000000: episode: 8005, duration: 2.325s, episode steps: 68, steps per second: 29, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.338 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001311, mean_absolute_error: 0.380273, mean_q: -0.450312
  688899/5000000: episode: 8006, duration: 1.569s, episode steps: 51, steps per second: 33, episode reward: -1.000, me

  690472/5000000: episode: 8031, duration: 0.748s, episode steps: 27, steps per second: 36, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.630 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.000883, mean_absolute_error: 0.381694, mean_q: -0.452792
  690499/5000000: episode: 8032, duration: 0.866s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001060, mean_absolute_error: 0.377183, mean_q: -0.447104
  690545/5000000: episode: 8033, duration: 1.445s, episode steps: 46, steps per second: 32, episode reward: -1.000, mean reward: -0.022 [-1.000, 0.000], mean action: 2.457 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.001946, mean_absolute_error: 0.381513, mean_q: -0.449273
  690572/5000000: episode: 8034, duration: 0.926s, episode steps: 27, steps per second: 29, episode reward: -1.000, me

  692170/5000000: episode: 8059, duration: 2.262s, episode steps: 69, steps per second: 31, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.464 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.001556, mean_absolute_error: 0.384292, mean_q: -0.454979
  692225/5000000: episode: 8060, duration: 1.765s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.745 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001438, mean_absolute_error: 0.383866, mean_q: -0.454147
  692254/5000000: episode: 8061, duration: 0.909s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.552 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001127, mean_absolute_error: 0.382758, mean_q: -0.453412
  692288/5000000: episode: 8062, duration: 1.077s, episode steps: 34, steps per second: 32, episode reward: -1.000, me

  694100/5000000: episode: 8087, duration: 0.849s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.769 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001077, mean_absolute_error: 0.387560, mean_q: -0.457210
  694135/5000000: episode: 8088, duration: 1.139s, episode steps: 35, steps per second: 31, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.629 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.002235, mean_absolute_error: 0.386204, mean_q: -0.456783
  694235/5000000: episode: 8089, duration: 2.866s, episode steps: 100, steps per second: 35, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.670 [0.000, 5.000], mean observation: 0.216 [0.000, 24.000], loss: 0.001118, mean_absolute_error: 0.384629, mean_q: -0.455198
  694263/5000000: episode: 8090, duration: 0.938s, episode steps: 28, steps per second: 30, episode reward: -1.000, m

  695661/5000000: episode: 8115, duration: 0.882s, episode steps: 26, steps per second: 29, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.269 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001936, mean_absolute_error: 0.389909, mean_q: -0.459324
  695718/5000000: episode: 8116, duration: 1.756s, episode steps: 57, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.702 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001613, mean_absolute_error: 0.388195, mean_q: -0.458586
  695745/5000000: episode: 8117, duration: 0.910s, episode steps: 27, steps per second: 30, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.222 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 0.001536, mean_absolute_error: 0.386428, mean_q: -0.458145
  695835/5000000: episode: 8118, duration: 2.789s, episode steps: 90, steps per second: 32, episode reward: -1.000, me

  696988/5000000: episode: 8143, duration: 1.480s, episode steps: 53, steps per second: 36, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.528 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001454, mean_absolute_error: 0.390634, mean_q: -0.461204
  697015/5000000: episode: 8144, duration: 0.940s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001343, mean_absolute_error: 0.388738, mean_q: -0.461874
  697203/5000000: episode: 8145, duration: 6.124s, episode steps: 188, steps per second: 31, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.628 [0.000, 5.000], mean observation: 0.306 [0.000, 24.000], loss: 0.001566, mean_absolute_error: 0.390715, mean_q: -0.463210
  697266/5000000: episode: 8146, duration: 1.959s, episode steps: 63, steps per second: 32, episode reward: -1.000, m

  698481/5000000: episode: 8171, duration: 0.966s, episode steps: 28, steps per second: 29, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 3.464 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001931, mean_absolute_error: 0.390905, mean_q: -0.463539
  698540/5000000: episode: 8172, duration: 1.670s, episode steps: 59, steps per second: 35, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.746 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001294, mean_absolute_error: 0.392215, mean_q: -0.465023
  698566/5000000: episode: 8173, duration: 0.753s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.500 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.002400, mean_absolute_error: 0.395505, mean_q: -0.466143
  698624/5000000: episode: 8174, duration: 1.769s, episode steps: 58, steps per second: 33, episode reward: -1.000, me

  699853/5000000: episode: 8199, duration: 3.629s, episode steps: 128, steps per second: 35, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.367 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 0.001204, mean_absolute_error: 0.395713, mean_q: -0.469290
  699970/5000000: episode: 8200, duration: 3.693s, episode steps: 117, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.735 [0.000, 5.000], mean observation: 0.281 [0.000, 24.000], loss: 0.001436, mean_absolute_error: 0.395454, mean_q: -0.467981
  700100/5000000: episode: 8201, duration: 3.971s, episode steps: 130, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.554 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.000989, mean_absolute_error: 0.393949, mean_q: -0.466732
  700162/5000000: episode: 8202, duration: 1.774s, episode steps: 62, steps per second: 35, episode reward: -1.000,

  701909/5000000: episode: 8227, duration: 0.956s, episode steps: 28, steps per second: 29, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001633, mean_absolute_error: 0.389445, mean_q: -0.458845
  701935/5000000: episode: 8228, duration: 0.860s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.538 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001751, mean_absolute_error: 0.389875, mean_q: -0.462499
  702084/5000000: episode: 8229, duration: 4.864s, episode steps: 149, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.383 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.001703, mean_absolute_error: 0.390968, mean_q: -0.461907
  702349/5000000: episode: 8230, duration: 8.538s, episode steps: 265, steps per second: 31, episode reward: -1.000, 

  704235/5000000: episode: 8255, duration: 0.925s, episode steps: 31, steps per second: 34, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.677 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001616, mean_absolute_error: 0.388337, mean_q: -0.459557
  704289/5000000: episode: 8256, duration: 1.712s, episode steps: 54, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.963 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001438, mean_absolute_error: 0.387886, mean_q: -0.460059
  704343/5000000: episode: 8257, duration: 1.576s, episode steps: 54, steps per second: 34, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.778 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001361, mean_absolute_error: 0.388658, mean_q: -0.459695
  704368/5000000: episode: 8258, duration: 0.768s, episode steps: 25, steps per second: 33, episode reward: -1.000, me

  705669/5000000: episode: 8283, duration: 0.816s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.370 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 0.002201, mean_absolute_error: 0.388373, mean_q: -0.459390
  705721/5000000: episode: 8284, duration: 1.660s, episode steps: 52, steps per second: 31, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001746, mean_absolute_error: 0.390263, mean_q: -0.461768
  705842/5000000: episode: 8285, duration: 3.574s, episode steps: 121, steps per second: 34, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.901 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001683, mean_absolute_error: 0.391183, mean_q: -0.463157
  705875/5000000: episode: 8286, duration: 1.062s, episode steps: 33, steps per second: 31, episode reward: -1.000, m

  707048/5000000: episode: 8311, duration: 21.145s, episode steps: 104, steps per second: 5, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.279 [0.000, 5.000], mean observation: 0.272 [0.000, 24.000], loss: 0.001738, mean_absolute_error: 0.393726, mean_q: -0.465120
  707077/5000000: episode: 8312, duration: 0.914s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.034 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.000785, mean_absolute_error: 0.392479, mean_q: -0.462738
  707191/5000000: episode: 8313, duration: 3.817s, episode steps: 114, steps per second: 30, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.588 [0.000, 5.000], mean observation: 0.208 [0.000, 24.000], loss: 0.001679, mean_absolute_error: 0.393784, mean_q: -0.465621
  707216/5000000: episode: 8314, duration: 0.759s, episode steps: 25, steps per second: 33, episode reward: -1.000, 

  708863/5000000: episode: 8339, duration: 1.535s, episode steps: 51, steps per second: 33, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.275 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001402, mean_absolute_error: 0.394385, mean_q: -0.467173
  708920/5000000: episode: 8340, duration: 1.588s, episode steps: 57, steps per second: 36, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.544 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001869, mean_absolute_error: 0.396369, mean_q: -0.467569
  708980/5000000: episode: 8341, duration: 1.720s, episode steps: 60, steps per second: 35, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.000527, mean_absolute_error: 0.390344, mean_q: -0.464889
  709007/5000000: episode: 8342, duration: 1.050s, episode steps: 27, steps per second: 26, episode reward: -1.000, me

  710583/5000000: episode: 8367, duration: 1.802s, episode steps: 55, steps per second: 31, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.655 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001699, mean_absolute_error: 0.394506, mean_q: -0.466135
  710641/5000000: episode: 8368, duration: 1.744s, episode steps: 58, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.293 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001306, mean_absolute_error: 0.393214, mean_q: -0.465291
  710667/5000000: episode: 8369, duration: 0.831s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.308 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000683, mean_absolute_error: 0.390785, mean_q: -0.464940
  710696/5000000: episode: 8370, duration: 0.970s, episode steps: 29, steps per second: 30, episode reward: -1.000, me

  712064/5000000: episode: 8395, duration: 2.857s, episode steps: 89, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.674 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001306, mean_absolute_error: 0.392686, mean_q: -0.463208
  712093/5000000: episode: 8396, duration: 0.994s, episode steps: 29, steps per second: 29, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.931 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001706, mean_absolute_error: 0.391215, mean_q: -0.461512
  712149/5000000: episode: 8397, duration: 1.846s, episode steps: 56, steps per second: 30, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.839 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001761, mean_absolute_error: 0.393391, mean_q: -0.464822
  712178/5000000: episode: 8398, duration: 0.893s, episode steps: 29, steps per second: 32, episode reward: -1.000, me

  713526/5000000: episode: 8423, duration: 4.885s, episode steps: 148, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.527 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001494, mean_absolute_error: 0.391122, mean_q: -0.463382
  713553/5000000: episode: 8424, duration: 0.826s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.519 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002015, mean_absolute_error: 0.391951, mean_q: -0.462289
  713606/5000000: episode: 8425, duration: 1.616s, episode steps: 53, steps per second: 33, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 3.094 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.002101, mean_absolute_error: 0.394971, mean_q: -0.466992
  713633/5000000: episode: 8426, duration: 0.849s, episode steps: 27, steps per second: 32, episode reward: -1.000, m

  714790/5000000: episode: 8451, duration: 0.761s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001267, mean_absolute_error: 0.391785, mean_q: -0.458665
  714816/5000000: episode: 8452, duration: 0.866s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000716, mean_absolute_error: 0.390198, mean_q: -0.460106
  714842/5000000: episode: 8453, duration: 0.878s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.808 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001214, mean_absolute_error: 0.387500, mean_q: -0.461355
  714990/5000000: episode: 8454, duration: 4.780s, episode steps: 148, steps per second: 31, episode reward: -1.000, m

  716851/5000000: episode: 8479, duration: 2.427s, episode steps: 79, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.544 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.001255, mean_absolute_error: 0.390227, mean_q: -0.462150
  716884/5000000: episode: 8480, duration: 1.005s, episode steps: 33, steps per second: 33, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 1.818 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001098, mean_absolute_error: 0.389673, mean_q: -0.460713
  716942/5000000: episode: 8481, duration: 1.742s, episode steps: 58, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.345 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001412, mean_absolute_error: 0.388861, mean_q: -0.460494
  716969/5000000: episode: 8482, duration: 0.852s, episode steps: 27, steps per second: 32, episode reward: -1.000, me

  718675/5000000: episode: 8507, duration: 4.908s, episode steps: 154, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.494 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001392, mean_absolute_error: 0.384039, mean_q: -0.454864
  718790/5000000: episode: 8508, duration: 3.939s, episode steps: 115, steps per second: 29, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.461 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001728, mean_absolute_error: 0.385629, mean_q: -0.455353
  718911/5000000: episode: 8509, duration: 3.884s, episode steps: 121, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.281 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001812, mean_absolute_error: 0.385191, mean_q: -0.453955
  718957/5000000: episode: 8510, duration: 1.573s, episode steps: 46, steps per second: 29, episode reward: -1.000,

  720654/5000000: episode: 8535, duration: 0.808s, episode steps: 28, steps per second: 35, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.002811, mean_absolute_error: 0.387800, mean_q: -0.454451
  720685/5000000: episode: 8536, duration: 1.002s, episode steps: 31, steps per second: 31, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 2.710 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001493, mean_absolute_error: 0.384196, mean_q: -0.451443
  720722/5000000: episode: 8537, duration: 1.171s, episode steps: 37, steps per second: 32, episode reward: -1.000, mean reward: -0.027 [-1.000, 0.000], mean action: 2.622 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001414, mean_absolute_error: 0.384863, mean_q: -0.452202
  720910/5000000: episode: 8538, duration: 5.356s, episode steps: 188, steps per second: 35, episode reward: -1.000, m

  722242/5000000: episode: 8563, duration: 1.677s, episode steps: 56, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.821 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001701, mean_absolute_error: 0.391981, mean_q: -0.461556
  722273/5000000: episode: 8564, duration: 1.006s, episode steps: 31, steps per second: 31, episode reward: -1.000, mean reward: -0.032 [-1.000, 0.000], mean action: 1.903 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.389989, mean_q: -0.461130
  722305/5000000: episode: 8565, duration: 1.035s, episode steps: 32, steps per second: 31, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.406 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.002770, mean_absolute_error: 0.386620, mean_q: -0.457020
  722378/5000000: episode: 8566, duration: 2.186s, episode steps: 73, steps per second: 33, episode reward: -1.000, me

  723643/5000000: episode: 8591, duration: 6.019s, episode steps: 197, steps per second: 33, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.452 [0.000, 5.000], mean observation: 0.292 [0.000, 24.000], loss: 0.001562, mean_absolute_error: 0.387726, mean_q: -0.459007
  723671/5000000: episode: 8592, duration: 0.947s, episode steps: 28, steps per second: 30, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.607 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001634, mean_absolute_error: 0.386475, mean_q: -0.456676
  723696/5000000: episode: 8593, duration: 0.758s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.002203, mean_absolute_error: 0.389793, mean_q: -0.458721
  723723/5000000: episode: 8594, duration: 0.945s, episode steps: 27, steps per second: 29, episode reward: -1.000, m

  724972/5000000: episode: 8619, duration: 0.748s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.077 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001933, mean_absolute_error: 0.388289, mean_q: -0.460675
  724997/5000000: episode: 8620, duration: 0.776s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001114, mean_absolute_error: 0.386817, mean_q: -0.457196
  725048/5000000: episode: 8621, duration: 1.543s, episode steps: 51, steps per second: 33, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.804 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000988, mean_absolute_error: 0.384917, mean_q: -0.456667
  725208/5000000: episode: 8622, duration: 5.160s, episode steps: 160, steps per second: 31, episode reward: -1.000, m

  726440/5000000: episode: 8647, duration: 3.605s, episode steps: 119, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.588 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001776, mean_absolute_error: 0.383889, mean_q: -0.453640
  726500/5000000: episode: 8648, duration: 2.059s, episode steps: 60, steps per second: 29, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001478, mean_absolute_error: 0.382243, mean_q: -0.452593
  726622/5000000: episode: 8649, duration: 3.651s, episode steps: 122, steps per second: 33, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.451 [0.000, 5.000], mean observation: 0.224 [0.000, 24.000], loss: 0.001579, mean_absolute_error: 0.381988, mean_q: -0.450892
  726650/5000000: episode: 8650, duration: 0.951s, episode steps: 28, steps per second: 29, episode reward: -1.000, 

  728305/5000000: episode: 8675, duration: 1.218s, episode steps: 36, steps per second: 30, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.861 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001327, mean_absolute_error: 0.379678, mean_q: -0.447199
  728338/5000000: episode: 8676, duration: 0.985s, episode steps: 33, steps per second: 33, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.002722, mean_absolute_error: 0.379535, mean_q: -0.447680
  728374/5000000: episode: 8677, duration: 1.053s, episode steps: 36, steps per second: 34, episode reward: -1.000, mean reward: -0.028 [-1.000, 0.000], mean action: 2.306 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001973, mean_absolute_error: 0.379269, mean_q: -0.447224
  728560/5000000: episode: 8678, duration: 6.169s, episode steps: 186, steps per second: 30, episode reward: -1.000, m

  729725/5000000: episode: 8703, duration: 1.803s, episode steps: 57, steps per second: 32, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.175 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001563, mean_absolute_error: 0.380961, mean_q: -0.451379
  729821/5000000: episode: 8704, duration: 3.044s, episode steps: 96, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.479 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001524, mean_absolute_error: 0.381075, mean_q: -0.449455
  729851/5000000: episode: 8705, duration: 1.034s, episode steps: 30, steps per second: 29, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001871, mean_absolute_error: 0.382852, mean_q: -0.451521
  729879/5000000: episode: 8706, duration: 1.010s, episode steps: 28, steps per second: 28, episode reward: -1.000, me

  731674/5000000: episode: 8731, duration: 0.744s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.040 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.000759, mean_absolute_error: 0.381304, mean_q: -0.451150
  731700/5000000: episode: 8732, duration: 0.874s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002518, mean_absolute_error: 0.382328, mean_q: -0.451545
  731806/5000000: episode: 8733, duration: 3.364s, episode steps: 106, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.340 [0.000, 5.000], mean observation: 0.258 [0.000, 24.000], loss: 0.002143, mean_absolute_error: 0.381619, mean_q: -0.448570
  731847/5000000: episode: 8734, duration: 1.297s, episode steps: 41, steps per second: 32, episode reward: -1.000, m

  733263/5000000: episode: 8759, duration: 0.770s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 0.001666, mean_absolute_error: 0.387432, mean_q: -0.456877
  733291/5000000: episode: 8760, duration: 0.907s, episode steps: 28, steps per second: 31, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.571 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001172, mean_absolute_error: 0.384948, mean_q: -0.454792
  733406/5000000: episode: 8761, duration: 3.816s, episode steps: 115, steps per second: 30, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.443 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001457, mean_absolute_error: 0.384168, mean_q: -0.454953
  733488/5000000: episode: 8762, duration: 2.633s, episode steps: 82, steps per second: 31, episode reward: -1.000, m

  734843/5000000: episode: 8787, duration: 1.737s, episode steps: 63, steps per second: 36, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.603 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001905, mean_absolute_error: 0.385531, mean_q: -0.456421
  734903/5000000: episode: 8788, duration: 4.648s, episode steps: 60, steps per second: 13, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.467 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001465, mean_absolute_error: 0.386755, mean_q: -0.457273
  734938/5000000: episode: 8789, duration: 1.067s, episode steps: 35, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.286 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.002234, mean_absolute_error: 0.388600, mean_q: -0.457856
  734998/5000000: episode: 8790, duration: 1.799s, episode steps: 60, steps per second: 33, episode reward: -1.000, me

  736732/5000000: episode: 8815, duration: 3.130s, episode steps: 97, steps per second: 31, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001735, mean_absolute_error: 0.390505, mean_q: -0.461185
  736765/5000000: episode: 8816, duration: 1.006s, episode steps: 33, steps per second: 33, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.003208, mean_absolute_error: 0.390713, mean_q: -0.461635
  736918/5000000: episode: 8817, duration: 4.905s, episode steps: 153, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.529 [0.000, 5.000], mean observation: 0.280 [0.000, 24.000], loss: 0.001624, mean_absolute_error: 0.391781, mean_q: -0.463898
  736956/5000000: episode: 8818, duration: 1.172s, episode steps: 38, steps per second: 32, episode reward: -1.000, m

  738329/5000000: episode: 8843, duration: 0.863s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.240 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001605, mean_absolute_error: 0.392410, mean_q: -0.462832
  738418/5000000: episode: 8844, duration: 2.650s, episode steps: 89, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.697 [0.000, 5.000], mean observation: 0.265 [0.000, 24.000], loss: 0.001581, mean_absolute_error: 0.391409, mean_q: -0.462143
  738506/5000000: episode: 8845, duration: 2.435s, episode steps: 88, steps per second: 36, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.807 [0.000, 5.000], mean observation: 0.261 [0.000, 24.000], loss: 0.001235, mean_absolute_error: 0.389117, mean_q: -0.461373
  738568/5000000: episode: 8846, duration: 1.860s, episode steps: 62, steps per second: 33, episode reward: -1.000, me

  740113/5000000: episode: 8871, duration: 2.569s, episode steps: 87, steps per second: 34, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.483 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001290, mean_absolute_error: 0.391921, mean_q: -0.464653
  740165/5000000: episode: 8872, duration: 1.603s, episode steps: 52, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.250 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 0.001644, mean_absolute_error: 0.391870, mean_q: -0.463615
  740222/5000000: episode: 8873, duration: 1.742s, episode steps: 57, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.246 [0.000, 5.000], mean observation: 0.282 [0.000, 24.000], loss: 0.001583, mean_absolute_error: 0.391715, mean_q: -0.460774
  740356/5000000: episode: 8874, duration: 4.301s, episode steps: 134, steps per second: 31, episode reward: -1.000, m

  742279/5000000: episode: 8899, duration: 1.824s, episode steps: 59, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.356 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.001663, mean_absolute_error: 0.393789, mean_q: -0.465526
  742309/5000000: episode: 8900, duration: 0.955s, episode steps: 30, steps per second: 31, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.300 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001199, mean_absolute_error: 0.391455, mean_q: -0.461562
  742339/5000000: episode: 8901, duration: 0.883s, episode steps: 30, steps per second: 34, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.333 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001648, mean_absolute_error: 0.390760, mean_q: -0.461990
  742374/5000000: episode: 8902, duration: 1.176s, episode steps: 35, steps per second: 30, episode reward: -1.000, me

  744139/5000000: episode: 8927, duration: 1.057s, episode steps: 29, steps per second: 27, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.002000, mean_absolute_error: 0.392807, mean_q: -0.465704
  744227/5000000: episode: 8928, duration: 2.802s, episode steps: 88, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.364 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001478, mean_absolute_error: 0.392374, mean_q: -0.463611
  744255/5000000: episode: 8929, duration: 0.913s, episode steps: 28, steps per second: 31, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.536 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.002373, mean_absolute_error: 0.397329, mean_q: -0.469114
  744295/5000000: episode: 8930, duration: 1.320s, episode steps: 40, steps per second: 30, episode reward: -1.000, me

  745654/5000000: episode: 8955, duration: 6.101s, episode steps: 219, steps per second: 36, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.292 [0.000, 24.000], loss: 0.001610, mean_absolute_error: 0.392064, mean_q: -0.461952
  745866/5000000: episode: 8956, duration: 6.892s, episode steps: 212, steps per second: 31, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.623 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001493, mean_absolute_error: 0.390332, mean_q: -0.461967
  745894/5000000: episode: 8957, duration: 0.964s, episode steps: 28, steps per second: 29, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.893 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.002264, mean_absolute_error: 0.391391, mean_q: -0.460551
  745987/5000000: episode: 8958, duration: 2.793s, episode steps: 93, steps per second: 33, episode reward: -1.000, 

  747505/5000000: episode: 8983, duration: 3.202s, episode steps: 114, steps per second: 36, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.228 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001812, mean_absolute_error: 0.382127, mean_q: -0.450871
  747591/5000000: episode: 8984, duration: 2.401s, episode steps: 86, steps per second: 36, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.628 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001857, mean_absolute_error: 0.383662, mean_q: -0.453929
  747620/5000000: episode: 8985, duration: 0.917s, episode steps: 29, steps per second: 32, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.690 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.002086, mean_absolute_error: 0.384069, mean_q: -0.454299
  747649/5000000: episode: 8986, duration: 0.921s, episode steps: 29, steps per second: 31, episode reward: -1.000, m

  749856/5000000: episode: 9011, duration: 4.856s, episode steps: 148, steps per second: 30, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.324 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 0.001415, mean_absolute_error: 0.383241, mean_q: -0.455349
  749885/5000000: episode: 9012, duration: 0.942s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.000967, mean_absolute_error: 0.382812, mean_q: -0.454972
  749947/5000000: episode: 9013, duration: 1.945s, episode steps: 62, steps per second: 32, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.597 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001501, mean_absolute_error: 0.383662, mean_q: -0.453364
  750012/5000000: episode: 9014, duration: 2.066s, episode steps: 65, steps per second: 31, episode reward: -1.000, m

  751381/5000000: episode: 9039, duration: 5.276s, episode steps: 164, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.482 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001695, mean_absolute_error: 0.385558, mean_q: -0.455258
  751411/5000000: episode: 9040, duration: 1.056s, episode steps: 30, steps per second: 28, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001706, mean_absolute_error: 0.385382, mean_q: -0.453931
  751493/5000000: episode: 9041, duration: 2.578s, episode steps: 82, steps per second: 32, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.780 [0.000, 5.000], mean observation: 0.305 [0.000, 24.000], loss: 0.001142, mean_absolute_error: 0.382230, mean_q: -0.452808
  751573/5000000: episode: 9042, duration: 2.428s, episode steps: 80, steps per second: 33, episode reward: -1.000, m

  753224/5000000: episode: 9067, duration: 3.655s, episode steps: 116, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.466 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.001167, mean_absolute_error: 0.382609, mean_q: -0.453546
  753311/5000000: episode: 9068, duration: 2.771s, episode steps: 87, steps per second: 31, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.460 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.000865, mean_absolute_error: 0.380072, mean_q: -0.451715
  753378/5000000: episode: 9069, duration: 2.123s, episode steps: 67, steps per second: 32, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.478 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001196, mean_absolute_error: 0.379200, mean_q: -0.451014
  753404/5000000: episode: 9070, duration: 0.840s, episode steps: 26, steps per second: 31, episode reward: -1.000, m

  754820/5000000: episode: 9095, duration: 1.954s, episode steps: 57, steps per second: 29, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.263 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001470, mean_absolute_error: 0.384874, mean_q: -0.453109
  754845/5000000: episode: 9096, duration: 0.814s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.880 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001494, mean_absolute_error: 0.381928, mean_q: -0.450750
  754873/5000000: episode: 9097, duration: 0.874s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.036 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001016, mean_absolute_error: 0.380495, mean_q: -0.450114
  755001/5000000: episode: 9098, duration: 4.066s, episode steps: 128, steps per second: 31, episode reward: -1.000, m

  756845/5000000: episode: 9123, duration: 2.288s, episode steps: 66, steps per second: 29, episode reward: -1.000, mean reward: -0.015 [-1.000, 0.000], mean action: 2.485 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.001825, mean_absolute_error: 0.386525, mean_q: -0.456081
  756915/5000000: episode: 9124, duration: 2.361s, episode steps: 70, steps per second: 30, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.514 [0.000, 5.000], mean observation: 0.253 [0.000, 24.000], loss: 0.001251, mean_absolute_error: 0.385358, mean_q: -0.455887
  757071/5000000: episode: 9125, duration: 5.243s, episode steps: 156, steps per second: 30, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.564 [0.000, 5.000], mean observation: 0.223 [0.000, 24.000], loss: 0.001108, mean_absolute_error: 0.382375, mean_q: -0.453375
  757099/5000000: episode: 9126, duration: 0.865s, episode steps: 28, steps per second: 32, episode reward: -1.000, m

  758677/5000000: episode: 9151, duration: 3.624s, episode steps: 117, steps per second: 32, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.393 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001486, mean_absolute_error: 0.382552, mean_q: -0.453264
  758737/5000000: episode: 9152, duration: 1.926s, episode steps: 60, steps per second: 31, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.933 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001567, mean_absolute_error: 0.381715, mean_q: -0.451687
  758762/5000000: episode: 9153, duration: 0.873s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.003562, mean_absolute_error: 0.385388, mean_q: -0.454390
  758788/5000000: episode: 9154, duration: 0.773s, episode steps: 26, steps per second: 34, episode reward: -1.000, m

  760284/5000000: episode: 9179, duration: 0.840s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.480 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001112, mean_absolute_error: 0.386230, mean_q: -0.454012
  760309/5000000: episode: 9180, duration: 0.926s, episode steps: 25, steps per second: 27, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.160 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001670, mean_absolute_error: 0.385253, mean_q: -0.453566
  760473/5000000: episode: 9181, duration: 5.311s, episode steps: 164, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.323 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 0.001719, mean_absolute_error: 0.386128, mean_q: -0.456768
  760499/5000000: episode: 9182, duration: 0.883s, episode steps: 26, steps per second: 29, episode reward: -1.000, m

  762152/5000000: episode: 9207, duration: 0.888s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.429 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.000801, mean_absolute_error: 0.392293, mean_q: -0.466635
  762274/5000000: episode: 9208, duration: 3.851s, episode steps: 122, steps per second: 32, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.525 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001682, mean_absolute_error: 0.394944, mean_q: -0.467067
  762300/5000000: episode: 9209, duration: 0.857s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.885 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001709, mean_absolute_error: 0.394108, mean_q: -0.467013
  762326/5000000: episode: 9210, duration: 0.870s, episode steps: 26, steps per second: 30, episode reward: -1.000, m

  764064/5000000: episode: 9235, duration: 0.952s, episode steps: 30, steps per second: 31, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.867 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001061, mean_absolute_error: 0.390268, mean_q: -0.460593
  764092/5000000: episode: 9236, duration: 0.914s, episode steps: 28, steps per second: 31, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.214 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001916, mean_absolute_error: 0.393424, mean_q: -0.463096
  764178/5000000: episode: 9237, duration: 2.571s, episode steps: 86, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.273 [0.000, 24.000], loss: 0.001059, mean_absolute_error: 0.389505, mean_q: -0.460410
  764206/5000000: episode: 9238, duration: 0.902s, episode steps: 28, steps per second: 31, episode reward: -1.000, me

  766140/5000000: episode: 9263, duration: 2.402s, episode steps: 78, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001710, mean_absolute_error: 0.387617, mean_q: -0.459687
  766197/5000000: episode: 9264, duration: 1.873s, episode steps: 57, steps per second: 30, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.579 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001443, mean_absolute_error: 0.390192, mean_q: -0.460443
  766226/5000000: episode: 9265, duration: 0.992s, episode steps: 29, steps per second: 29, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001563, mean_absolute_error: 0.388978, mean_q: -0.460190
  766314/5000000: episode: 9266, duration: 2.530s, episode steps: 88, steps per second: 35, episode reward: -1.000, me

  767540/5000000: episode: 9291, duration: 1.674s, episode steps: 55, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.145 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001312, mean_absolute_error: 0.390132, mean_q: -0.463484
  767676/5000000: episode: 9292, duration: 4.406s, episode steps: 136, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.529 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001644, mean_absolute_error: 0.390168, mean_q: -0.463576
  767773/5000000: episode: 9293, duration: 2.680s, episode steps: 97, steps per second: 36, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.392 [0.000, 5.000], mean observation: 0.266 [0.000, 24.000], loss: 0.001853, mean_absolute_error: 0.392998, mean_q: -0.462289
  767799/5000000: episode: 9294, duration: 0.920s, episode steps: 26, steps per second: 28, episode reward: -1.000, m

  769371/5000000: episode: 9319, duration: 0.916s, episode steps: 33, steps per second: 36, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.515 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.001112, mean_absolute_error: 0.389808, mean_q: -0.461909
  769460/5000000: episode: 9320, duration: 2.702s, episode steps: 89, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.685 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001619, mean_absolute_error: 0.391087, mean_q: -0.463072
  769485/5000000: episode: 9321, duration: 0.728s, episode steps: 25, steps per second: 34, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.920 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001070, mean_absolute_error: 0.391937, mean_q: -0.465568
  769573/5000000: episode: 9322, duration: 2.754s, episode steps: 88, steps per second: 32, episode reward: -1.000, me

  770946/5000000: episode: 9347, duration: 0.867s, episode steps: 26, steps per second: 30, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.192 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001222, mean_absolute_error: 0.388904, mean_q: -0.461606
  771005/5000000: episode: 9348, duration: 1.807s, episode steps: 59, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.267 [0.000, 24.000], loss: 0.001309, mean_absolute_error: 0.388604, mean_q: -0.460708
  771039/5000000: episode: 9349, duration: 1.045s, episode steps: 34, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.765 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.002259, mean_absolute_error: 0.392110, mean_q: -0.461019
  771093/5000000: episode: 9350, duration: 1.821s, episode steps: 54, steps per second: 30, episode reward: -1.000, me

  772492/5000000: episode: 9375, duration: 1.824s, episode steps: 58, steps per second: 32, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.638 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001576, mean_absolute_error: 0.387721, mean_q: -0.459079
  772519/5000000: episode: 9376, duration: 0.917s, episode steps: 27, steps per second: 29, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.481 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002088, mean_absolute_error: 0.385968, mean_q: -0.458211
  772580/5000000: episode: 9377, duration: 1.845s, episode steps: 61, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 3.148 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001817, mean_absolute_error: 0.385885, mean_q: -0.453366
  772608/5000000: episode: 9378, duration: 0.798s, episode steps: 28, steps per second: 35, episode reward: -1.000, me

  774198/5000000: episode: 9403, duration: 2.521s, episode steps: 84, steps per second: 33, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.512 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 0.001262, mean_absolute_error: 0.385924, mean_q: -0.456023
  774351/5000000: episode: 9404, duration: 4.623s, episode steps: 153, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.654 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001985, mean_absolute_error: 0.388269, mean_q: -0.458781
  774423/5000000: episode: 9405, duration: 2.257s, episode steps: 72, steps per second: 32, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.653 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.002007, mean_absolute_error: 0.386202, mean_q: -0.454363
  774450/5000000: episode: 9406, duration: 0.846s, episode steps: 27, steps per second: 32, episode reward: -1.000, m

  775880/5000000: episode: 9431, duration: 0.826s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.360 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001295, mean_absolute_error: 0.388089, mean_q: -0.457498
  775905/5000000: episode: 9432, duration: 0.781s, episode steps: 25, steps per second: 32, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001850, mean_absolute_error: 0.385826, mean_q: -0.454255
  775930/5000000: episode: 9433, duration: 0.719s, episode steps: 25, steps per second: 35, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.640 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001843, mean_absolute_error: 0.387621, mean_q: -0.458834
  776014/5000000: episode: 9434, duration: 2.748s, episode steps: 84, steps per second: 31, episode reward: -1.000, me

  777568/5000000: episode: 9459, duration: 1.462s, episode steps: 51, steps per second: 35, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.686 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.002151, mean_absolute_error: 0.388462, mean_q: -0.459858
  777600/5000000: episode: 9460, duration: 0.984s, episode steps: 32, steps per second: 33, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.469 [0.000, 5.000], mean observation: 0.247 [0.000, 24.000], loss: 0.002004, mean_absolute_error: 0.392036, mean_q: -0.462071
  777640/5000000: episode: 9461, duration: 1.201s, episode steps: 40, steps per second: 33, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.475 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001142, mean_absolute_error: 0.385663, mean_q: -0.459245
  777707/5000000: episode: 9462, duration: 2.106s, episode steps: 67, steps per second: 32, episode reward: -1.000, me

  778929/5000000: episode: 9487, duration: 3.199s, episode steps: 103, steps per second: 32, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.709 [0.000, 5.000], mean observation: 0.256 [0.000, 24.000], loss: 0.001804, mean_absolute_error: 0.384794, mean_q: -0.455591
  779048/5000000: episode: 9488, duration: 3.975s, episode steps: 119, steps per second: 30, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.403 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001827, mean_absolute_error: 0.386363, mean_q: -0.456080
  779073/5000000: episode: 9489, duration: 0.805s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.280 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001478, mean_absolute_error: 0.384708, mean_q: -0.455510
  779104/5000000: episode: 9490, duration: 1.001s, episode steps: 31, steps per second: 31, episode reward: -1.000, 

  780628/5000000: episode: 9515, duration: 2.409s, episode steps: 76, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.355 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.001277, mean_absolute_error: 0.385489, mean_q: -0.457017
  780655/5000000: episode: 9516, duration: 0.816s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.037 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001209, mean_absolute_error: 0.382228, mean_q: -0.453921
  780682/5000000: episode: 9517, duration: 0.814s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001542, mean_absolute_error: 0.384665, mean_q: -0.454542
  780707/5000000: episode: 9518, duration: 0.811s, episode steps: 25, steps per second: 31, episode reward: -1.000, me

  782680/5000000: episode: 9543, duration: 4.661s, episode steps: 145, steps per second: 31, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.303 [0.000, 5.000], mean observation: 0.197 [0.000, 24.000], loss: 0.001528, mean_absolute_error: 0.386509, mean_q: -0.456461
  782721/5000000: episode: 9544, duration: 1.362s, episode steps: 41, steps per second: 30, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.585 [0.000, 5.000], mean observation: 0.259 [0.000, 24.000], loss: 0.001461, mean_absolute_error: 0.384165, mean_q: -0.454959
  782940/5000000: episode: 9545, duration: 6.945s, episode steps: 219, steps per second: 32, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.548 [0.000, 5.000], mean observation: 0.301 [0.000, 24.000], loss: 0.001504, mean_absolute_error: 0.385458, mean_q: -0.456133
  782997/5000000: episode: 9546, duration: 1.824s, episode steps: 57, steps per second: 31, episode reward: -1.000, 

  784769/5000000: episode: 9571, duration: 3.947s, episode steps: 124, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.260 [0.000, 24.000], loss: 0.001823, mean_absolute_error: 0.388333, mean_q: -0.458599
  784796/5000000: episode: 9572, duration: 0.867s, episode steps: 27, steps per second: 31, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.407 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001688, mean_absolute_error: 0.391045, mean_q: -0.459123
  784891/5000000: episode: 9573, duration: 2.841s, episode steps: 95, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.726 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.001952, mean_absolute_error: 0.392030, mean_q: -0.462999
  784980/5000000: episode: 9574, duration: 3.019s, episode steps: 89, steps per second: 29, episode reward: -1.000, m

  786587/5000000: episode: 9599, duration: 2.369s, episode steps: 77, steps per second: 33, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.494 [0.000, 5.000], mean observation: 0.262 [0.000, 24.000], loss: 0.001521, mean_absolute_error: 0.390816, mean_q: -0.463485
  786679/5000000: episode: 9600, duration: 2.764s, episode steps: 92, steps per second: 33, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.587 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001326, mean_absolute_error: 0.391367, mean_q: -0.463282
  786705/5000000: episode: 9601, duration: 0.820s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.077 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001557, mean_absolute_error: 0.390503, mean_q: -0.461571
  786731/5000000: episode: 9602, duration: 0.850s, episode steps: 26, steps per second: 31, episode reward: -1.000, me

  788283/5000000: episode: 9627, duration: 1.085s, episode steps: 33, steps per second: 30, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.667 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.000974, mean_absolute_error: 0.388240, mean_q: -0.458416
  788310/5000000: episode: 9628, duration: 0.834s, episode steps: 27, steps per second: 32, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 3.481 [1.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.002914, mean_absolute_error: 0.389331, mean_q: -0.460056
  788430/5000000: episode: 9629, duration: 3.834s, episode steps: 120, steps per second: 31, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.325 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001063, mean_absolute_error: 0.387390, mean_q: -0.459123
  788481/5000000: episode: 9630, duration: 1.636s, episode steps: 51, steps per second: 31, episode reward: -1.000, m

  790179/5000000: episode: 9655, duration: 4.178s, episode steps: 148, steps per second: 35, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.297 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.001747, mean_absolute_error: 0.386493, mean_q: -0.456016
  790301/5000000: episode: 9656, duration: 3.866s, episode steps: 122, steps per second: 32, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.459 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001443, mean_absolute_error: 0.384888, mean_q: -0.455144
  790330/5000000: episode: 9657, duration: 0.934s, episode steps: 29, steps per second: 31, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.690 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001449, mean_absolute_error: 0.382447, mean_q: -0.452346
  790534/5000000: episode: 9658, duration: 6.450s, episode steps: 204, steps per second: 32, episode reward: -1.000,

  792410/5000000: episode: 9683, duration: 3.641s, episode steps: 124, steps per second: 34, episode reward: -1.000, mean reward: -0.008 [-1.000, 0.000], mean action: 2.444 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001442, mean_absolute_error: 0.381088, mean_q: -0.449886
  792436/5000000: episode: 9684, duration: 0.793s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.154 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001989, mean_absolute_error: 0.379960, mean_q: -0.449961
  792477/5000000: episode: 9685, duration: 1.246s, episode steps: 41, steps per second: 33, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.341 [0.000, 5.000], mean observation: 0.251 [0.000, 24.000], loss: 0.000818, mean_absolute_error: 0.379064, mean_q: -0.448920
  792534/5000000: episode: 9686, duration: 1.813s, episode steps: 57, steps per second: 31, episode reward: -1.000, m

  794085/5000000: episode: 9711, duration: 3.228s, episode steps: 101, steps per second: 31, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.327 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.001922, mean_absolute_error: 0.381980, mean_q: -0.449669
  794146/5000000: episode: 9712, duration: 1.829s, episode steps: 61, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.508 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001466, mean_absolute_error: 0.382127, mean_q: -0.453275
  794172/5000000: episode: 9713, duration: 0.838s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.731 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001626, mean_absolute_error: 0.381169, mean_q: -0.451578
  794242/5000000: episode: 9714, duration: 2.184s, episode steps: 70, steps per second: 32, episode reward: -1.000, m

  795801/5000000: episode: 9739, duration: 1.041s, episode steps: 30, steps per second: 29, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.433 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001998, mean_absolute_error: 0.386223, mean_q: -0.455854
  795835/5000000: episode: 9740, duration: 1.027s, episode steps: 34, steps per second: 33, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.706 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.000973, mean_absolute_error: 0.384004, mean_q: -0.454697
  795897/5000000: episode: 9741, duration: 1.867s, episode steps: 62, steps per second: 33, episode reward: -1.000, mean reward: -0.016 [-1.000, 0.000], mean action: 2.468 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001504, mean_absolute_error: 0.382615, mean_q: -0.453431
  795959/5000000: episode: 9742, duration: 1.987s, episode steps: 62, steps per second: 31, episode reward: -1.000, me

  797311/5000000: episode: 9767, duration: 2.651s, episode steps: 92, steps per second: 35, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.283 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001314, mean_absolute_error: 0.384807, mean_q: -0.455710
  797363/5000000: episode: 9768, duration: 1.607s, episode steps: 52, steps per second: 32, episode reward: -1.000, mean reward: -0.019 [-1.000, 0.000], mean action: 2.442 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001788, mean_absolute_error: 0.385659, mean_q: -0.453569
  797530/5000000: episode: 9769, duration: 5.699s, episode steps: 167, steps per second: 29, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.551 [0.000, 5.000], mean observation: 0.258 [0.000, 24.000], loss: 0.001749, mean_absolute_error: 0.385864, mean_q: -0.454675
  797708/5000000: episode: 9770, duration: 5.267s, episode steps: 178, steps per second: 34, episode reward: -1.000, 

  799251/5000000: episode: 9795, duration: 5.741s, episode steps: 179, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.397 [0.000, 5.000], mean observation: 0.284 [0.000, 24.000], loss: 0.001675, mean_absolute_error: 0.389993, mean_q: -0.460567
  799281/5000000: episode: 9796, duration: 0.969s, episode steps: 30, steps per second: 31, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.800 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001213, mean_absolute_error: 0.388391, mean_q: -0.461283
  799322/5000000: episode: 9797, duration: 1.365s, episode steps: 41, steps per second: 30, episode reward: -1.000, mean reward: -0.024 [-1.000, 0.000], mean action: 2.878 [0.000, 5.000], mean observation: 0.258 [0.000, 24.000], loss: 0.001654, mean_absolute_error: 0.392084, mean_q: -0.464543
  799382/5000000: episode: 9798, duration: 1.771s, episode steps: 60, steps per second: 34, episode reward: -1.000, m

  800809/5000000: episode: 9823, duration: 0.761s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.600 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.004057, mean_absolute_error: 0.388875, mean_q: -0.454680
  800842/5000000: episode: 9824, duration: 1.090s, episode steps: 33, steps per second: 30, episode reward: -1.000, mean reward: -0.030 [-1.000, 0.000], mean action: 2.212 [0.000, 5.000], mean observation: 0.248 [0.000, 24.000], loss: 0.001961, mean_absolute_error: 0.385863, mean_q: -0.452600
  800893/5000000: episode: 9825, duration: 1.539s, episode steps: 51, steps per second: 33, episode reward: -1.000, mean reward: -0.020 [-1.000, 0.000], mean action: 2.235 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001376, mean_absolute_error: 0.383201, mean_q: -0.452314
  800926/5000000: episode: 9826, duration: 1.046s, episode steps: 33, steps per second: 32, episode reward: -1.000, me

  802391/5000000: episode: 9851, duration: 2.193s, episode steps: 80, steps per second: 36, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.925 [0.000, 5.000], mean observation: 0.235 [0.000, 24.000], loss: 0.001708, mean_absolute_error: 0.389109, mean_q: -0.460530
  802581/5000000: episode: 9852, duration: 6.238s, episode steps: 190, steps per second: 30, episode reward: -1.000, mean reward: -0.005 [-1.000, 0.000], mean action: 2.521 [0.000, 5.000], mean observation: 0.293 [0.000, 24.000], loss: 0.001302, mean_absolute_error: 0.387014, mean_q: -0.458488
  802607/5000000: episode: 9853, duration: 0.739s, episode steps: 26, steps per second: 35, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.077 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.002455, mean_absolute_error: 0.388463, mean_q: -0.458992
  802633/5000000: episode: 9854, duration: 0.858s, episode steps: 26, steps per second: 30, episode reward: -1.000, m

  804009/5000000: episode: 9879, duration: 0.795s, episode steps: 26, steps per second: 33, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.923 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001430, mean_absolute_error: 0.381199, mean_q: -0.450604
  804036/5000000: episode: 9880, duration: 0.829s, episode steps: 27, steps per second: 33, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.704 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001898, mean_absolute_error: 0.383413, mean_q: -0.450898
  804125/5000000: episode: 9881, duration: 2.802s, episode steps: 89, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.517 [0.000, 5.000], mean observation: 0.330 [0.000, 24.000], loss: 0.001322, mean_absolute_error: 0.381108, mean_q: -0.450899
  804151/5000000: episode: 9882, duration: 0.835s, episode steps: 26, steps per second: 31, episode reward: -1.000, me

  805322/5000000: episode: 9907, duration: 0.941s, episode steps: 32, steps per second: 34, episode reward: -1.000, mean reward: -0.031 [-1.000, 0.000], mean action: 2.312 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001749, mean_absolute_error: 0.383544, mean_q: -0.453558
  805352/5000000: episode: 9908, duration: 0.892s, episode steps: 30, steps per second: 34, episode reward: -1.000, mean reward: -0.033 [-1.000, 0.000], mean action: 2.867 [0.000, 5.000], mean observation: 0.244 [0.000, 24.000], loss: 0.001238, mean_absolute_error: 0.383713, mean_q: -0.453873
  805412/5000000: episode: 9909, duration: 1.828s, episode steps: 60, steps per second: 33, episode reward: -1.000, mean reward: -0.017 [-1.000, 0.000], mean action: 2.533 [0.000, 5.000], mean observation: 0.245 [0.000, 24.000], loss: 0.001490, mean_absolute_error: 0.382884, mean_q: -0.451361
  805490/5000000: episode: 9910, duration: 2.311s, episode steps: 78, steps per second: 34, episode reward: -1.000, me

  806810/5000000: episode: 9935, duration: 0.891s, episode steps: 29, steps per second: 33, episode reward: -1.000, mean reward: -0.034 [-1.000, 0.000], mean action: 2.828 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.000965, mean_absolute_error: 0.388001, mean_q: -0.459754
  806835/5000000: episode: 9936, duration: 0.805s, episode steps: 25, steps per second: 31, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.680 [0.000, 5.000], mean observation: 0.237 [0.000, 24.000], loss: 0.001648, mean_absolute_error: 0.386434, mean_q: -0.459497
  806869/5000000: episode: 9937, duration: 1.131s, episode steps: 34, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.324 [0.000, 5.000], mean observation: 0.250 [0.000, 24.000], loss: 0.001480, mean_absolute_error: 0.389340, mean_q: -0.460340
  806894/5000000: episode: 9938, duration: 0.783s, episode steps: 25, steps per second: 32, episode reward: -1.000, me

  808983/5000000: episode: 9963, duration: 0.899s, episode steps: 27, steps per second: 30, episode reward: -1.000, mean reward: -0.037 [-1.000, 0.000], mean action: 2.889 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.001716, mean_absolute_error: 0.384389, mean_q: -0.453741
  809018/5000000: episode: 9964, duration: 1.185s, episode steps: 35, steps per second: 30, episode reward: -1.000, mean reward: -0.029 [-1.000, 0.000], mean action: 2.143 [0.000, 5.000], mean observation: 0.249 [0.000, 24.000], loss: 0.000940, mean_absolute_error: 0.384392, mean_q: -0.454595
  809101/5000000: episode: 9965, duration: 2.473s, episode steps: 83, steps per second: 34, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 3.120 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001678, mean_absolute_error: 0.385029, mean_q: -0.455396
  809127/5000000: episode: 9966, duration: 0.740s, episode steps: 26, steps per second: 35, episode reward: -1.000, me

  810866/5000000: episode: 9991, duration: 1.690s, episode steps: 55, steps per second: 33, episode reward: -1.000, mean reward: -0.018 [-1.000, 0.000], mean action: 2.545 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001027, mean_absolute_error: 0.380115, mean_q: -0.451211
  810969/5000000: episode: 9992, duration: 2.998s, episode steps: 103, steps per second: 34, episode reward: -1.000, mean reward: -0.010 [-1.000, 0.000], mean action: 2.505 [0.000, 5.000], mean observation: 0.295 [0.000, 24.000], loss: 0.001869, mean_absolute_error: 0.383555, mean_q: -0.453074
  810994/5000000: episode: 9993, duration: 0.859s, episode steps: 25, steps per second: 29, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 3.120 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001128, mean_absolute_error: 0.381561, mean_q: -0.451869
  811045/5000000: episode: 9994, duration: 1.742s, episode steps: 51, steps per second: 29, episode reward: -1.000, m

  812618/5000000: episode: 10019, duration: 2.369s, episode steps: 76, steps per second: 32, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.303 [0.000, 5.000], mean observation: 0.254 [0.000, 24.000], loss: 0.001597, mean_absolute_error: 0.384393, mean_q: -0.453771
  812644/5000000: episode: 10020, duration: 0.846s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.231 [0.000, 5.000], mean observation: 0.238 [0.000, 24.000], loss: 0.001073, mean_absolute_error: 0.382639, mean_q: -0.453224
  812734/5000000: episode: 10021, duration: 2.785s, episode steps: 90, steps per second: 32, episode reward: -1.000, mean reward: -0.011 [-1.000, 0.000], mean action: 2.522 [0.000, 5.000], mean observation: 0.246 [0.000, 24.000], loss: 0.001603, mean_absolute_error: 0.383763, mean_q: -0.453599
  812818/5000000: episode: 10022, duration: 2.553s, episode steps: 84, steps per second: 33, episode reward: -1.000

  814943/5000000: episode: 10047, duration: 4.442s, episode steps: 145, steps per second: 33, episode reward: -1.000, mean reward: -0.007 [-1.000, 0.000], mean action: 2.621 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001400, mean_absolute_error: 0.386512, mean_q: -0.456901
  814968/5000000: episode: 10048, duration: 0.759s, episode steps: 25, steps per second: 33, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.520 [0.000, 5.000], mean observation: 0.236 [0.000, 24.000], loss: 0.001666, mean_absolute_error: 0.386356, mean_q: -0.456126
  814994/5000000: episode: 10049, duration: 0.845s, episode steps: 26, steps per second: 31, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 3.000 [0.000, 5.000], mean observation: 0.239 [0.000, 24.000], loss: 0.000891, mean_absolute_error: 0.382823, mean_q: -0.455913
  815026/5000000: episode: 10050, duration: 1.056s, episode steps: 32, steps per second: 30, episode reward: -1.00

  816606/5000000: episode: 10075, duration: 0.863s, episode steps: 28, steps per second: 32, episode reward: -1.000, mean reward: -0.036 [-1.000, 0.000], mean action: 2.000 [0.000, 5.000], mean observation: 0.241 [0.000, 24.000], loss: 0.001302, mean_absolute_error: 0.388833, mean_q: -0.460689
  816686/5000000: episode: 10076, duration: 2.388s, episode steps: 80, steps per second: 34, episode reward: -1.000, mean reward: -0.013 [-1.000, 0.000], mean action: 2.663 [0.000, 5.000], mean observation: 0.243 [0.000, 24.000], loss: 0.001148, mean_absolute_error: 0.386439, mean_q: -0.458274
  816861/5000000: episode: 10077, duration: 5.570s, episode steps: 175, steps per second: 31, episode reward: -1.000, mean reward: -0.006 [-1.000, 0.000], mean action: 2.611 [0.000, 5.000], mean observation: 0.242 [0.000, 24.000], loss: 0.001573, mean_absolute_error: 0.387495, mean_q: -0.458209
  816890/5000000: episode: 10078, duration: 0.920s, episode steps: 29, steps per second: 32, episode reward: -1.00

  818395/5000000: episode: 10103, duration: 0.805s, episode steps: 26, steps per second: 32, episode reward: -1.000, mean reward: -0.038 [-1.000, 0.000], mean action: 2.308 [0.000, 5.000], mean observation: 0.233 [0.000, 24.000], loss: 0.001934, mean_absolute_error: 0.388492, mean_q: -0.457490
  818434/5000000: episode: 10104, duration: 1.338s, episode steps: 39, steps per second: 29, episode reward: -1.000, mean reward: -0.026 [-1.000, 0.000], mean action: 2.692 [0.000, 5.000], mean observation: 0.257 [0.000, 24.000], loss: 0.002012, mean_absolute_error: 0.388839, mean_q: -0.457994
  818459/5000000: episode: 10105, duration: 0.835s, episode steps: 25, steps per second: 30, episode reward: -1.000, mean reward: -0.040 [-1.000, 0.000], mean action: 2.320 [0.000, 5.000], mean observation: 0.232 [0.000, 24.000], loss: 0.001104, mean_absolute_error: 0.385683, mean_q: -0.457840
  818484/5000000: episode: 10106, duration: 0.797s, episode steps: 25, steps per second: 31, episode reward: -1.000

  819829/5000000: episode: 10131, duration: 2.411s, episode steps: 83, steps per second: 34, episode reward: -1.000, mean reward: -0.012 [-1.000, 0.000], mean action: 2.518 [0.000, 5.000], mean observation: 0.240 [0.000, 24.000], loss: 0.001804, mean_absolute_error: 0.388769, mean_q: -0.460570
  819945/5000000: episode: 10132, duration: 3.742s, episode steps: 116, steps per second: 31, episode reward: -1.000, mean reward: -0.009 [-1.000, 0.000], mean action: 2.440 [0.000, 5.000], mean observation: 0.355 [0.000, 24.000], loss: 0.002135, mean_absolute_error: 0.393753, mean_q: -0.462599
  819985/5000000: episode: 10133, duration: 1.186s, episode steps: 40, steps per second: 34, episode reward: -1.000, mean reward: -0.025 [-1.000, 0.000], mean action: 2.450 [0.000, 5.000], mean observation: 0.255 [0.000, 24.000], loss: 0.002017, mean_absolute_error: 0.394663, mean_q: -0.465477
  820012/5000000: episode: 10134, duration: 0.888s, episode steps: 27, steps per second: 30, episode reward: -1.00

  821882/5000000: episode: 10159, duration: 2.026s, episode steps: 71, steps per second: 35, episode reward: -1.000, mean reward: -0.014 [-1.000, 0.000], mean action: 2.380 [0.000, 5.000], mean observation: 0.252 [0.000, 24.000], loss: 0.001970, mean_absolute_error: 0.391126, mean_q: -0.463478
done, took 25491.867 seconds


In [9]:
dqn.save_weights(model_path, overwrite=True)