In [None]:
import gym
import gym.spaces
import enum
import numpy as np
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import ptan
import torch.optim as optim
import os
import csv
import glob
import numpy as np
import collections
from torch.optim import Adam
from d3rlpy.algos import DQN
from sklearn.model_selection import train_test_split
from d3rlpy.models.optimizers import OptimizerFactory
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.online.explorers import LinearDecayEpsilonGreedy, ConstantEpsilonGreedy
from gym.utils import seeding
from gym.envs.registration import EnvSpec

Define some functions to prepare and load stocks data from csv files

In [None]:



Prices = collections.namedtuple('Prices', field_names=['open', 'high', 'low', 'close', 'volume'])


def read_csv(file_name, sep=',', filter_data=True, fix_open_price=False):
    print("Reading", file_name)
    with open(file_name, 'rt', encoding='utf-8') as fd:
        reader = csv.reader(fd, delimiter=sep)
        h = next(reader)
        if '<OPEN>' not in h and sep == ',':
            return read_csv(file_name, ';')
        indices = [h.index(s) for s in ('<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>')]
        o, h, l, c, v = [], [], [], [], []
        count_out = 0
        count_filter = 0
        count_fixed = 0
        prev_vals = None
        for row in reader:
            vals = list(map(float, [row[idx] for idx in indices]))
            if filter_data and all(map(lambda v: abs(v-vals[0]) < 1e-8, vals[:-1])):
                count_filter += 1
                continue

            po, ph, pl, pc, pv = vals

            # fix open price for current bar to match close price for the previous bar
            if fix_open_price and prev_vals is not None:
                ppo, pph, ppl, ppc, ppv = prev_vals
                if abs(po - ppc) > 1e-8:
                    count_fixed += 1
                    po = ppc
                    pl = min(pl, po)
                    ph = max(ph, po)
            count_out += 1
            o.append(po)
            c.append(pc)
            h.append(ph)
            l.append(pl)
            v.append(pv)
            prev_vals = vals
    print("Read done, got %d rows, %d filtered, %d open prices adjusted" % (
        count_filter + count_out, count_filter, count_fixed))
    return Prices(open=np.array(o, dtype=np.float32),
                  high=np.array(h, dtype=np.float32),
                  low=np.array(l, dtype=np.float32),
                  close=np.array(c, dtype=np.float32),
                  volume=np.array(v, dtype=np.float32))


def prices_to_relative(prices):
    """
    Convert prices to relative in respect to open price
    :param ochl: tuple with open, close, high, low
    :return: tuple with open, rel_close, rel_high, rel_low
    """
    assert isinstance(prices, Prices)
    rh = (prices.high - prices.open) / prices.open
    rl = (prices.low - prices.open) / prices.open
    rc = (prices.close - prices.open) / prices.open
    return Prices(open=prices.open, high=rh, low=rl, close=rc, volume=prices.volume)


def load_relative(csv_file):
    return prices_to_relative(read_csv(csv_file))


def price_files(dir_name):
    result = []
    for path in glob.glob(os.path.join(dir_name, "*.csv")):
        result.append(path)
    return result


def load_year_data(year, basedir='data'):
    y = str(year)[-2:]
    result = {}
    for path in glob.glob(os.path.join(basedir, "*_%s*.csv" % y)):
        result[path] = load_relative(path)
    return result

We encode all available actions as an enumerator's fields. We support a very simple
set of actions with only three options: do nothing, buy a single share, and close the
existing position

In [None]:
DEFAULT_BARS_COUNT = 10
DEFAULT_COMMISSION_PERC = 0.1

class Actions(enum.Enum):
    Skip = 0
    Buy = 1
    Close = 2

State class, is a python class which implements most of the environment's functionality like
method calculates the current bar's close price. Prices passed to the State
class have the relative form in respect of the open price: the high, low, and close
components are relative ratios to the open price

In [None]:
class State:
    def __init__(
        self,
        bars_count,
        commission_perc,
        reset_on_close,
        reward_on_close=True,
        volumes=True,
    ):
        assert isinstance(bars_count, int)
        assert bars_count > 0
        assert isinstance(commission_perc, float)
        assert commission_perc >= 0.0
        assert isinstance(reset_on_close, bool)
        assert isinstance(reward_on_close, bool)
        self.bars_count = bars_count
        self.commission_perc = commission_perc
        self.reset_on_close = reset_on_close
        self.reward_on_close = reward_on_close
        self.volumes = volumes

    def reset(self, prices, offset):
        assert isinstance(prices, Prices)
        assert offset >= self.bars_count - 1
        self.have_position = False
        self.open_price = 0.0
        self._prices = prices
        self._offset = offset

    @property
    def shape(self):
        # [h, l, c] * bars + position_flag + rel_profit
        if self.volumes:
            return (4 * self.bars_count + 1 + 1,)
        else:
            return (3 * self.bars_count + 1 + 1,)

    def encode(self):
        """
        Convert current state into numpy array.
        """
        res = np.ndarray(shape=self.shape, dtype=np.float32)
        shift = 0
        for bar_idx in range(-self.bars_count + 1, 1):
            ofs = self._offset + bar_idx
            res[shift] = self._prices.high[ofs]
            shift += 1
            res[shift] = self._prices.low[ofs]
            shift += 1
            res[shift] = self._prices.close[ofs]
            shift += 1
            if self.volumes:
                res[shift] = self._prices.volume[ofs]
                shift += 1
        res[shift] = float(self.have_position)
        shift += 1
        if not self.have_position:
            res[shift] = 0.0
        else:
            res[shift] = self._cur_close() / self.open_price - 1.0
        return res

    def _cur_close(self):
        """
        Calculate real close price for the current bar
        """
        open = self._prices.open[self._offset]
        rel_close = self._prices.close[self._offset]
        return open * (1.0 + rel_close)

    def step(self, action):
        """
        Perform one step in our price, adjust offset, check for the end of prices
        and handle position change
        :param action:
        :return: reward, done
        """
        assert isinstance(action, Actions)
        reward = 0.0
        done = False
        close = self._cur_close()
        if action == Actions.Buy and not self.have_position:
            self.have_position = True
            self.open_price = close
            reward -= self.commission_perc
        elif action == Actions.Close and self.have_position:
            reward -= self.commission_perc
            done |= self.reset_on_close
            if self.reward_on_close:
                reward += 100.0 * (close / self.open_price - 1.0)
            self.have_position = False
            self.open_price = 0.0

        self._offset += 1
        prev_close = close
        close = self._cur_close()
        done |= self._offset >= self._prices.close.shape[0] - 1

        if self.have_position and not self.reward_on_close:
            reward += 100.0 * (close / prev_close - 1.0)

        return reward, done

In [None]:
class State1D(State):
    """
    State with shape suitable for 1D convolution
    """

    @property
    def shape(self):
        if self.volumes:
            return (6, self.bars_count)
        else:
            return (5, self.bars_count)

    def encode(self):
        res = np.zeros(shape=self.shape, dtype=np.float32)
        start = self._offset - (self.bars_count - 1)
        stop = self._offset + 1
        res[0] = self._prices.high[start:stop]
        res[1] = self._prices.low[start:stop]
        res[2] = self._prices.close[start:stop]
        if self.volumes:
            res[3] = self._prices.volume[start:stop]
            dst = 4
        else:
            dst = 3
        if self.have_position:
            res[dst] = 1.0
            res[dst + 1] = self._cur_close() / self.open_price - 1.0
        return res

# Trading env

In [None]:
class StocksEnv(gym.Env):
    metadata = {"render.modes": ["human"]}
    spec = EnvSpec("StocksEnv-v0")

    def __init__(
        self,
        prices,
        bars_count=DEFAULT_BARS_COUNT,
        commission=DEFAULT_COMMISSION_PERC,
        reset_on_close=True,
        state_1d=False,
        random_ofs_on_reset=True,
        reward_on_close=False,
        volumes=False,
    ):
        assert isinstance(prices, dict)
        self._prices = prices
        if state_1d:
            self._state = State1D(
                bars_count,
                commission,
                reset_on_close,
                reward_on_close=reward_on_close,
                volumes=volumes,
            )
        else:
            self._state = State(
                bars_count,
                commission,
                reset_on_close,
                reward_on_close=reward_on_close,
                volumes=volumes,
            )
        self.action_space = gym.spaces.Discrete(n=len(Actions))
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32
        )
        self.random_ofs_on_reset = random_ofs_on_reset
        self.seed()

    def reset(self):
        # make selection of the instrument and it's offset. Then reset the state
        self._instrument = self.np_random.choice(list(self._prices.keys()))
        prices = self._prices[self._instrument]
        bars = self._state.bars_count
        if self.random_ofs_on_reset:
            offset = self.np_random.choice(prices.high.shape[0] - bars * 10) + bars
        else:
            offset = bars
        self._state.reset(prices, offset)
        return self._state.encode()

    def step(self, action_idx):
        action = Actions(action_idx)
        reward, done = self._state.step(action)
        obs = self._state.encode()
        info = {"instrument": self._instrument, "offset": self._state._offset}
        return obs, reward, done, info

    def render(self, mode="human", close=False):
        pass

    def close(self):
        pass

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    @classmethod
    def from_dir(cls, data_dir, **kwargs):
        prices = {file: data.load_relative(file) for file in data.price_files(data_dir)}
        return StocksEnv(prices, **kwargs)

# Model

We will use here the pre-built d3rlpy algos, Here we will use the DQN function

In [None]:
# training and validation data
data_path = "./YNDX_160101_161231.csv"
val_path = "./YNDX_150101_151231.csv"

stock_data = {"YNDX": load_relative(data_path)}
val_data = {"YNDX": load_relative(val_path)}

Reading ./YNDX_160101_161231.csv
Read done, got 131542 rows, 99752 filtered, 0 open prices adjusted
Reading ./YNDX_150101_151231.csv
Read done, got 130566 rows, 104412 filtered, 0 open prices adjusted


Set up 2 env, one for training and the second for validation.

In [None]:
# train env
BARS_COUNT = 10
env = StocksEnv(stock_data, bars_count=BARS_COUNT)

In [None]:
# validation env
env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
env_val = StocksEnv(val_data, bars_count=BARS_COUNT)

# Online Training

set up a buffer 

In [None]:
buffer = ReplayBuffer(maxlen= 1000000, env= env)
# create the epsilon-greedy explorer
#explorer = ConstantEpsilonGreedy(0.3)
explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0,
                                    end_epsilon=0.1,
                                    duration=100000)

Init DQN algo and launch training

In [None]:
# modify weight decay
optim_factory = OptimizerFactory(Adam, weight_decay=1e-4)

In [None]:
dqn = DQN(
    batch_size=32,
    learning_rate=2.5e-4,
    target_update_interval=100,
    #optim_factory= optim_factory
)

In [None]:
(env.observation_space.shape)

(32,)

In [None]:
dqn.build_with_env(env)

In [None]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import evaluate_on_environment

In [None]:
evaluate_scorer = evaluate_on_environment(env)

In [None]:
dqn.fit_online(
    env,
    buffer,
    explorer,
    n_steps=100000,  # train for 100K steps
    eval_env= env_val,
    n_steps_per_epoch=100,  # evaluation is performed every 100 steps
    update_start_step=100,  # parameter update starts after 100 steps
    eval_epsilon=0.3,
    save_metrics= True,
    tensorboard_dir= 'runs'
)

2023-01-10 00:15.34 [info     ] Directory is created at d3rlpy_logs/DQN_online_20230110001534
2023-01-10 00:15.34 [info     ] Parameters are saved to d3rlpy_logs/DQN_online_20230110001534/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.00025, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 'target_update_interval': 100, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (32,), 'action_size': 3}


  0%|          | 0/100000 [00:00<?, ?it/s]

2023-01-10 00:15.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_100.pt
2023-01-10 00:15.34 [info     ] DQN_online_20230110001534: epoch=1 step=100 epoch=1 metrics={'time_inference': 0.00025288820266723634, 'time_environment_step': 2.2585391998291017e-05, 'time_step': 0.00029983282089233396, 'rollout_return': -0.31152872342192617, 'evaluation': 0.2991823133979478} step=100
2023-01-10 00:15.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_200.pt
2023-01-10 00:15.34 [info     ] DQN_online_20230110001534: epoch=2 step=200 epoch=2 metrics={'time_inference': 0.00026726245880126954, 'time_environment_step': 2.9616355895996093e-05, 'time_sample_batch': 5.3038597106933596e-05, 'time_algorithm_update': 0.001407921314239502, 'loss': 0.01144238221924752, 'time_step': 0.0017974019050598145, 'rollout_return': -0.31639709899078755, 'evaluation': -0.2736961954494065} step=200
2023-01-10 00:15.34 [info     ] Model paramet

2023-01-10 00:15.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_1700.pt
2023-01-10 00:15.38 [info     ] DQN_online_20230110001534: epoch=17 step=1700 epoch=17 metrics={'time_inference': 0.0002759885787963867, 'time_environment_step': 3.0465126037597657e-05, 'time_sample_batch': 6.654739379882813e-05, 'time_algorithm_update': 0.001603410243988037, 'loss': 0.015562230846844613, 'time_step': 0.0020165419578552246, 'rollout_return': -0.2119316450908426, 'evaluation': -0.297359922366057} step=1700
2023-01-10 00:15.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_1800.pt
2023-01-10 00:15.38 [info     ] DQN_online_20230110001534: epoch=18 step=1800 epoch=18 metrics={'time_inference': 0.0003447437286376953, 'time_environment_step': 4.114389419555664e-05, 'time_sample_batch': 8.385419845581054e-05, 'time_algorithm_update': 0.0018523216247558593, 'loss': 0.018637413321994246, 'time_step': 0.0023844766616821288, 'ro

2023-01-10 00:15.41 [info     ] DQN_online_20230110001534: epoch=32 step=3200 epoch=32 metrics={'time_inference': 0.0002588963508605957, 'time_environment_step': 2.956390380859375e-05, 'time_sample_batch': 6.735801696777343e-05, 'time_algorithm_update': 0.001593327522277832, 'loss': 0.01707541592884809, 'time_step': 0.001987147331237793, 'rollout_return': -0.252488391701142, 'evaluation': -0.3616412690087865} step=3200
2023-01-10 00:15.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_3300.pt
2023-01-10 00:15.41 [info     ] DQN_online_20230110001534: epoch=33 step=3300 epoch=33 metrics={'time_inference': 0.0002767658233642578, 'time_environment_step': 3.0925273895263674e-05, 'time_sample_batch': 6.908893585205077e-05, 'time_algorithm_update': 0.0016211938858032226, 'loss': 0.016783953569829466, 'time_step': 0.002034780979156494, 'rollout_return': -0.03699126754431607, 'evaluation': -0.301390354672235} step=3300
2023-01-10 00:15.42 [info     ] Mode

2023-01-10 00:15.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_4800.pt
2023-01-10 00:15.45 [info     ] DQN_online_20230110001534: epoch=48 step=4800 epoch=48 metrics={'time_inference': 0.00024962425231933594, 'time_environment_step': 2.7930736541748048e-05, 'time_sample_batch': 5.9909820556640625e-05, 'time_algorithm_update': 0.001503114700317383, 'loss': 0.01401442687259987, 'time_step': 0.001878976821899414, 'rollout_return': -0.06542280808257249, 'evaluation': -0.07410608973192997} step=4800
2023-01-10 00:15.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_4900.pt
2023-01-10 00:15.45 [info     ] DQN_online_20230110001534: epoch=49 step=4900 epoch=49 metrics={'time_inference': 0.00025389432907104494, 'time_environment_step': 2.819061279296875e-05, 'rollout_return': -0.23063885918819174, 'time_sample_batch': 6.179332733154297e-05, 'time_algorithm_update': 0.0014842438697814941, 'loss': 0.014831919132266

2023-01-10 00:15.48 [info     ] DQN_online_20230110001534: epoch=63 step=6300 epoch=63 metrics={'time_inference': 0.00026466131210327147, 'time_environment_step': 2.9659271240234376e-05, 'time_sample_batch': 6.873130798339843e-05, 'time_algorithm_update': 0.0015426230430603026, 'loss': 0.01666675295913592, 'time_step': 0.0019467782974243164, 'rollout_return': -0.25639756238630185, 'evaluation': -0.36807530147968215} step=6300
2023-01-10 00:15.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_6400.pt
2023-01-10 00:15.49 [info     ] DQN_online_20230110001534: epoch=64 step=6400 epoch=64 metrics={'time_inference': 0.00025969982147216797, 'time_environment_step': 2.8917789459228517e-05, 'time_sample_batch': 6.36744499206543e-05, 'time_algorithm_update': 0.0015999650955200195, 'loss': 0.014159160642884671, 'time_step': 0.001991136074066162, 'rollout_return': -0.24389019168059425, 'evaluation': -0.36606076518850816} step=6400
2023-01-10 00:15.49 [info  

2023-01-10 00:15.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_7900.pt
2023-01-10 00:15.52 [info     ] DQN_online_20230110001534: epoch=79 step=7900 epoch=79 metrics={'time_inference': 0.00024982452392578123, 'time_environment_step': 2.8727054595947264e-05, 'time_sample_batch': 6.180047988891601e-05, 'time_algorithm_update': 0.0015094876289367676, 'loss': 0.01409641140839085, 'time_step': 0.0018905067443847655, 'rollout_return': -0.2896756734420345, 'evaluation': 0.07045626815421771} step=7900
2023-01-10 00:15.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_8000.pt
2023-01-10 00:15.52 [info     ] DQN_online_20230110001534: epoch=80 step=8000 epoch=80 metrics={'time_inference': 0.00024870872497558596, 'time_environment_step': 2.765178680419922e-05, 'time_sample_batch': 6.117820739746094e-05, 'time_algorithm_update': 0.001500844955444336, 'loss': 0.01272684388095513, 'time_step': 0.0018782925605773926, 'r

2023-01-10 00:15.56 [info     ] DQN_online_20230110001534: epoch=94 step=9400 epoch=94 metrics={'time_inference': 0.0002692079544067383, 'time_environment_step': 3.0214786529541017e-05, 'time_sample_batch': 6.604909896850585e-05, 'time_algorithm_update': 0.0015251755714416504, 'loss': 0.012755600172095, 'time_step': 0.0019317793846130372, 'rollout_return': -0.21891946221360695, 'evaluation': -0.02271675255117834} step=9400
2023-01-10 00:15.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_9500.pt
2023-01-10 00:15.56 [info     ] DQN_online_20230110001534: epoch=95 step=9500 epoch=95 metrics={'time_inference': 0.0002657127380371094, 'time_environment_step': 3.041982650756836e-05, 'rollout_return': -0.18491323441126858, 'time_sample_batch': 6.911277770996094e-05, 'time_algorithm_update': 0.001531672477722168, 'loss': 0.015037320100236683, 'time_step': 0.00194014310836792, 'evaluation': 0.129180732749742} step=9500
2023-01-10 00:15.56 [info     ] Mode

2023-01-10 00:16.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_11000.pt
2023-01-10 00:16.00 [info     ] DQN_online_20230110001534: epoch=110 step=11000 epoch=110 metrics={'time_inference': 0.00028449535369873047, 'time_environment_step': 3.183126449584961e-05, 'time_sample_batch': 7.389307022094727e-05, 'time_algorithm_update': 0.0016296863555908202, 'loss': 0.014206030033528804, 'time_step': 0.0020633935928344727, 'rollout_return': -0.1806627991810138, 'evaluation': -0.02251334005797561} step=11000
2023-01-10 00:16.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_11100.pt
2023-01-10 00:16.00 [info     ] DQN_online_20230110001534: epoch=111 step=11100 epoch=111 metrics={'time_inference': 0.00027382850646972656, 'time_environment_step': 3.039836883544922e-05, 'time_sample_batch': 6.667375564575195e-05, 'time_algorithm_update': 0.0015428709983825685, 'loss': 0.012491681409301236, 'time_step': 0.00195393085

2023-01-10 00:16.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_12500.pt
2023-01-10 00:16.03 [info     ] DQN_online_20230110001534: epoch=125 step=12500 epoch=125 metrics={'time_inference': 0.00027103662490844725, 'time_environment_step': 3.111124038696289e-05, 'time_sample_batch': 7.402658462524414e-05, 'time_algorithm_update': 0.0016695189476013184, 'loss': 0.013098010586109012, 'time_step': 0.0020865440368652345, 'rollout_return': -0.22865894858208002, 'evaluation': 0.03148167909143533} step=12500
2023-01-10 00:16.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_12600.pt
2023-01-10 00:16.03 [info     ] DQN_online_20230110001534: epoch=126 step=12600 epoch=126 metrics={'time_inference': 0.00024648189544677733, 'time_environment_step': 2.729654312133789e-05, 'rollout_return': -0.14019448207726917, 'time_sample_batch': 6.068944931030274e-05, 'time_algorithm_update': 0.001438314914703369, 'loss': 0.0106155

2023-01-10 00:16.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_14000.pt
2023-01-10 00:16.07 [info     ] DQN_online_20230110001534: epoch=140 step=14000 epoch=140 metrics={'time_inference': 0.0002392268180847168, 'time_environment_step': 2.6311874389648437e-05, 'time_sample_batch': 5.9094429016113285e-05, 'time_algorithm_update': 0.0014008140563964843, 'loss': 0.011856800842797383, 'time_step': 0.001761472225189209, 'rollout_return': -0.1564861919966344, 'evaluation': -0.27351567587905895} step=14000
2023-01-10 00:16.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_14100.pt
2023-01-10 00:16.07 [info     ] DQN_online_20230110001534: epoch=141 step=14100 epoch=141 metrics={'time_inference': 0.00023845195770263672, 'time_environment_step': 2.652168273925781e-05, 'time_sample_batch': 5.8758258819580075e-05, 'time_algorithm_update': 0.0014121055603027344, 'loss': 0.01165305609581992, 'time_step': 0.00177165508

2023-01-10 00:16.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_15500.pt
2023-01-10 00:16.10 [info     ] DQN_online_20230110001534: epoch=155 step=15500 epoch=155 metrics={'time_inference': 0.00022893428802490235, 'time_environment_step': 2.4728775024414062e-05, 'time_sample_batch': 5.653858184814453e-05, 'time_algorithm_update': 0.001379866600036621, 'loss': 0.01060905006946996, 'time_step': 0.0017229175567626952, 'rollout_return': -0.18505712247506884, 'evaluation': -0.06107760250684582} step=15500
2023-01-10 00:16.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_15600.pt
2023-01-10 00:16.10 [info     ] DQN_online_20230110001534: epoch=156 step=15600 epoch=156 metrics={'time_inference': 0.00026319026947021487, 'time_environment_step': 2.9671192169189454e-05, 'time_sample_batch': 7.093906402587891e-05, 'time_algorithm_update': 0.0016151022911071776, 'loss': 0.012830084704328329, 'time_step': 0.0020171642

2023-01-10 00:16.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_17000.pt
2023-01-10 00:16.13 [info     ] DQN_online_20230110001534: epoch=170 step=17000 epoch=170 metrics={'time_inference': 0.00026715755462646484, 'time_environment_step': 3.119468688964844e-05, 'rollout_return': 0.02351175098425907, 'time_sample_batch': 7.57598876953125e-05, 'time_algorithm_update': 0.0016295242309570313, 'loss': 0.010451548586133867, 'time_step': 0.0020455455780029295, 'evaluation': -0.029730786357361268} step=17000
2023-01-10 00:16.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_17100.pt
2023-01-10 00:16.13 [info     ] DQN_online_20230110001534: epoch=171 step=17100 epoch=171 metrics={'time_inference': 0.00022692203521728515, 'time_environment_step': 2.5119781494140625e-05, 'time_sample_batch': 5.678892135620117e-05, 'time_algorithm_update': 0.0013648724555969237, 'loss': 0.013679921489092521, 'time_step': 0.0017068862

2023-01-10 00:16.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_18500.pt
2023-01-10 00:16.16 [info     ] DQN_online_20230110001534: epoch=185 step=18500 epoch=185 metrics={'time_inference': 0.00026712179183959963, 'time_environment_step': 3.091812133789063e-05, 'time_sample_batch': 7.500886917114258e-05, 'time_algorithm_update': 0.0016307497024536132, 'loss': 0.012396960643818601, 'time_step': 0.0020435047149658203, 'rollout_return': -0.30062004841969636, 'evaluation': -0.053013460411974564} step=18500
2023-01-10 00:16.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_18600.pt
2023-01-10 00:16.17 [info     ] DQN_online_20230110001534: epoch=186 step=18600 epoch=186 metrics={'time_inference': 0.0002674984931945801, 'time_environment_step': 3.0581951141357424e-05, 'time_sample_batch': 7.361412048339844e-05, 'time_algorithm_update': 0.001617288589477539, 'loss': 0.012876597903668881, 'time_step': 0.0020296287

2023-01-10 00:16.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_20000.pt
2023-01-10 00:16.20 [info     ] DQN_online_20230110001534: epoch=200 step=20000 epoch=200 metrics={'time_inference': 0.00023357391357421874, 'time_environment_step': 2.4662017822265624e-05, 'time_sample_batch': 5.8515071868896486e-05, 'time_algorithm_update': 0.0013915443420410157, 'loss': 0.010499783044215292, 'time_step': 0.0017430591583251953, 'rollout_return': -0.2531427193126743, 'evaluation': 0.2289907721031849} step=20000
2023-01-10 00:16.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_20100.pt
2023-01-10 00:16.20 [info     ] DQN_online_20230110001534: epoch=201 step=20100 epoch=201 metrics={'time_inference': 0.0002890968322753906, 'time_environment_step': 3.163576126098633e-05, 'time_sample_batch': 7.734060287475586e-05, 'time_algorithm_update': 0.0016087031364440919, 'loss': 0.012167995921336114, 'time_step': 0.002045733928

2023-01-10 00:16.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_21500.pt
2023-01-10 00:16.23 [info     ] DQN_online_20230110001534: epoch=215 step=21500 epoch=215 metrics={'time_inference': 0.0002413201332092285, 'time_environment_step': 2.5017261505126955e-05, 'time_sample_batch': 5.7866573333740236e-05, 'time_algorithm_update': 0.0014067816734313965, 'loss': 0.013745948765426874, 'time_step': 0.0017661905288696288, 'rollout_return': -0.13766708708582154, 'evaluation': -0.22693859013475076} step=21500
2023-01-10 00:16.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_21600.pt
2023-01-10 00:16.23 [info     ] DQN_online_20230110001534: epoch=216 step=21600 epoch=216 metrics={'time_inference': 0.00024997949600219727, 'time_environment_step': 2.8171539306640624e-05, 'time_sample_batch': 6.673097610473632e-05, 'time_algorithm_update': 0.0015276956558227539, 'loss': 0.011283382271649316, 'time_step': 0.00190647

2023-01-10 00:16.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_23000.pt
2023-01-10 00:16.26 [info     ] DQN_online_20230110001534: epoch=230 step=23000 epoch=230 metrics={'time_inference': 0.0002253842353820801, 'time_environment_step': 2.5525093078613282e-05, 'time_sample_batch': 5.778312683105469e-05, 'time_algorithm_update': 0.0013759636878967285, 'loss': 0.01290721251629293, 'time_step': 0.0017183971405029296, 'rollout_return': -0.21110997406492676, 'evaluation': -0.13019048032289465} step=23000
2023-01-10 00:16.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_23100.pt
2023-01-10 00:16.27 [info     ] DQN_online_20230110001534: epoch=231 step=23100 epoch=231 metrics={'time_inference': 0.00024141788482666017, 'time_environment_step': 2.5279521942138672e-05, 'time_sample_batch': 5.984306335449219e-05, 'time_algorithm_update': 0.0014361333847045898, 'loss': 0.01281100300140679, 'time_step': 0.00179595947

2023-01-10 00:16.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_24500.pt
2023-01-10 00:16.30 [info     ] DQN_online_20230110001534: epoch=245 step=24500 epoch=245 metrics={'time_inference': 0.00024082183837890626, 'time_environment_step': 2.5951862335205077e-05, 'rollout_return': -0.3281064540823278, 'time_sample_batch': 5.8896541595458985e-05, 'time_algorithm_update': 0.001396477222442627, 'loss': 0.01101426244014874, 'time_step': 0.0017576479911804198, 'evaluation': -0.10977107614177561} step=24500
2023-01-10 00:16.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_24600.pt
2023-01-10 00:16.30 [info     ] DQN_online_20230110001534: epoch=246 step=24600 epoch=246 metrics={'time_inference': 0.0002398371696472168, 'time_environment_step': 2.642631530761719e-05, 'time_sample_batch': 5.911827087402344e-05, 'time_algorithm_update': 0.0013903141021728515, 'loss': 0.0092004724428989, 'time_step': 0.00175071954727

2023-01-10 00:16.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_26000.pt
2023-01-10 00:16.33 [info     ] DQN_online_20230110001534: epoch=260 step=26000 epoch=260 metrics={'time_inference': 0.00023948192596435548, 'time_environment_step': 2.6066303253173828e-05, 'time_sample_batch': 6.0076713562011716e-05, 'time_algorithm_update': 0.0014085125923156737, 'loss': 0.010134948301129043, 'time_step': 0.0017720937728881836, 'rollout_return': -0.23254120939850242, 'evaluation': -0.3042753777067185} step=26000
2023-01-10 00:16.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_26100.pt
2023-01-10 00:16.33 [info     ] DQN_online_20230110001534: epoch=261 step=26100 epoch=261 metrics={'time_inference': 0.00024105548858642577, 'time_environment_step': 2.5954246520996094e-05, 'time_sample_batch': 6.148576736450195e-05, 'time_algorithm_update': 0.001437222957611084, 'loss': 0.00863392758066766, 'time_step': 0.0018013596

2023-01-10 00:16.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_27500.pt
2023-01-10 00:16.36 [info     ] DQN_online_20230110001534: epoch=275 step=27500 epoch=275 metrics={'time_inference': 0.00024033308029174804, 'time_environment_step': 2.6204586029052734e-05, 'time_sample_batch': 6.252288818359375e-05, 'time_algorithm_update': 0.0014505553245544433, 'loss': 0.010273163085803389, 'time_step': 0.0018139052391052245, 'rollout_return': -0.1456502391612322, 'evaluation': -0.8558215040196346} step=27500
2023-01-10 00:16.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_27600.pt
2023-01-10 00:16.37 [info     ] DQN_online_20230110001534: epoch=276 step=27600 epoch=276 metrics={'time_inference': 0.000281369686126709, 'time_environment_step': 3.1447410583496095e-05, 'time_sample_batch': 7.478713989257813e-05, 'time_algorithm_update': 0.00164778470993042, 'loss': 0.01113382630283013, 'time_step': 0.002075262069702

2023-01-10 00:16.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_29000.pt
2023-01-10 00:16.40 [info     ] DQN_online_20230110001534: epoch=290 step=29000 epoch=290 metrics={'time_inference': 0.00023415803909301758, 'time_environment_step': 2.5615692138671874e-05, 'time_sample_batch': 6.125450134277344e-05, 'time_algorithm_update': 0.0014118456840515137, 'loss': 0.011964089579414576, 'time_step': 0.0017660355567932129, 'rollout_return': -0.30025575818279093, 'evaluation': -0.01991193316813724} step=29000
2023-01-10 00:16.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_29100.pt
2023-01-10 00:16.40 [info     ] DQN_online_20230110001534: epoch=291 step=29100 epoch=291 metrics={'time_inference': 0.00023768901824951173, 'time_environment_step': 2.6640892028808593e-05, 'time_sample_batch': 6.115198135375977e-05, 'time_algorithm_update': 0.001423313617706299, 'loss': 0.011659931930480524, 'time_step': 0.001782810

2023-01-10 00:16.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_30500.pt
2023-01-10 00:16.43 [info     ] DQN_online_20230110001534: epoch=305 step=30500 epoch=305 metrics={'time_inference': 0.0002418661117553711, 'time_environment_step': 2.6938915252685546e-05, 'time_sample_batch': 6.253004074096679e-05, 'time_algorithm_update': 0.0014257287979125976, 'loss': 0.011971268348861485, 'time_step': 0.001793496608734131, 'rollout_return': -0.28822131344232343, 'evaluation': 0.1726679116019215} step=30500
2023-01-10 00:16.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_30600.pt
2023-01-10 00:16.43 [info     ] DQN_online_20230110001534: epoch=306 step=30600 epoch=306 metrics={'time_inference': 0.00025487899780273437, 'time_environment_step': 2.824068069458008e-05, 'time_sample_batch': 6.479978561401368e-05, 'time_algorithm_update': 0.0015072965621948243, 'loss': 0.010153105234494433, 'time_step': 0.0018920588493

2023-01-10 00:16.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_32000.pt
2023-01-10 00:16.46 [info     ] DQN_online_20230110001534: epoch=320 step=32000 epoch=320 metrics={'time_inference': 0.00024331092834472656, 'time_environment_step': 2.6617050170898438e-05, 'time_sample_batch': 6.213188171386719e-05, 'time_algorithm_update': 0.0014706587791442872, 'loss': 0.010301249106414617, 'time_step': 0.001841108798980713, 'rollout_return': -0.2555591595709961, 'evaluation': 0.4140816622591073} step=32000
2023-01-10 00:16.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_32100.pt
2023-01-10 00:16.46 [info     ] DQN_online_20230110001534: epoch=321 step=32100 epoch=321 metrics={'time_inference': 0.00024850130081176757, 'time_environment_step': 2.6624202728271485e-05, 'time_sample_batch': 6.140708923339844e-05, 'time_algorithm_update': 0.0014577269554138183, 'loss': 0.013605057346867398, 'time_step': 0.001829197406

2023-01-10 00:16.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_33500.pt
2023-01-10 00:16.50 [info     ] DQN_online_20230110001534: epoch=335 step=33500 epoch=335 metrics={'time_inference': 0.00024843692779541015, 'time_environment_step': 2.6874542236328125e-05, 'time_sample_batch': 6.289243698120118e-05, 'time_algorithm_update': 0.0014632654190063477, 'loss': 0.00967352295992896, 'time_step': 0.001837007999420166, 'rollout_return': -0.32985970226696665, 'evaluation': 0.10295144776181449} step=33500
2023-01-10 00:16.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_33600.pt
2023-01-10 00:16.50 [info     ] DQN_online_20230110001534: epoch=336 step=33600 epoch=336 metrics={'time_inference': 0.00025071144104003905, 'time_environment_step': 2.742767333984375e-05, 'rollout_return': -0.20060480292660052, 'time_sample_batch': 6.617307662963867e-05, 'time_algorithm_update': 0.0015464305877685546, 'loss': 0.0119398

2023-01-10 00:16.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_35000.pt
2023-01-10 00:16.53 [info     ] DQN_online_20230110001534: epoch=350 step=35000 epoch=350 metrics={'time_inference': 0.0002655482292175293, 'time_environment_step': 3.0469894409179687e-05, 'time_sample_batch': 7.35926628112793e-05, 'time_algorithm_update': 0.0016478848457336425, 'loss': 0.014272362312767654, 'time_step': 0.0020546817779541014, 'rollout_return': -0.3276970482391585, 'evaluation': -0.05828074734793502} step=35000
2023-01-10 00:16.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_35100.pt
2023-01-10 00:16.53 [info     ] DQN_online_20230110001534: epoch=351 step=35100 epoch=351 metrics={'time_inference': 0.00024510860443115233, 'time_environment_step': 2.742767333984375e-05, 'time_sample_batch': 6.504058837890625e-05, 'time_algorithm_update': 0.001471550464630127, 'loss': 0.01220843062037602, 'time_step': 0.00184408664703

2023-01-10 00:16.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_36500.pt
2023-01-10 00:16.56 [info     ] DQN_online_20230110001534: epoch=365 step=36500 epoch=365 metrics={'time_inference': 0.0002699828147888184, 'time_environment_step': 3.217458724975586e-05, 'time_sample_batch': 7.663011550903321e-05, 'time_algorithm_update': 0.0016317415237426759, 'loss': 0.010417315931990743, 'time_step': 0.0020519447326660155, 'rollout_return': -0.16170368960881623, 'evaluation': -0.06269103072897174} step=36500
2023-01-10 00:16.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_36600.pt
2023-01-10 00:16.57 [info     ] DQN_online_20230110001534: epoch=366 step=36600 epoch=366 metrics={'time_inference': 0.00027746200561523437, 'time_environment_step': 3.158092498779297e-05, 'time_sample_batch': 8.014678955078126e-05, 'time_algorithm_update': 0.0017076086997985839, 'loss': 0.010250715394504369, 'time_step': 0.00213599443

2023-01-10 00:17.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_38000.pt
2023-01-10 00:17.00 [info     ] DQN_online_20230110001534: epoch=380 step=38000 epoch=380 metrics={'time_inference': 0.00024671554565429686, 'time_environment_step': 2.8288364410400392e-05, 'rollout_return': -0.3858010683395121, 'time_sample_batch': 6.93964958190918e-05, 'time_algorithm_update': 0.0015898871421813965, 'loss': 0.011842756671831012, 'time_step': 0.0019703984260559084, 'evaluation': -0.26489225601054694} step=38000
2023-01-10 00:17.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_38100.pt
2023-01-10 00:17.00 [info     ] DQN_online_20230110001534: epoch=381 step=38100 epoch=381 metrics={'time_inference': 0.0002580857276916504, 'time_environment_step': 2.7043819427490236e-05, 'time_sample_batch': 6.269216537475586e-05, 'time_algorithm_update': 0.0014285016059875488, 'loss': 0.01109354340704158, 'time_step': 0.001810872554

2023-01-10 00:17.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_39500.pt
2023-01-10 00:17.03 [info     ] DQN_online_20230110001534: epoch=395 step=39500 epoch=395 metrics={'time_inference': 0.0002747821807861328, 'time_environment_step': 3.172159194946289e-05, 'time_sample_batch': 7.778406143188477e-05, 'time_algorithm_update': 0.0016872620582580567, 'loss': 0.01271461004158482, 'time_step': 0.0021124672889709472, 'rollout_return': -0.13782528626667842, 'evaluation': -0.21487437773253443} step=39500
2023-01-10 00:17.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_39600.pt
2023-01-10 00:17.03 [info     ] DQN_online_20230110001534: epoch=396 step=39600 epoch=396 metrics={'time_inference': 0.00025805473327636716, 'time_environment_step': 2.9420852661132814e-05, 'time_sample_batch': 7.090806961059571e-05, 'time_algorithm_update': 0.0016015028953552245, 'loss': 0.00945506250136532, 'time_step': 0.001998543739

2023-01-10 00:17.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_41000.pt
2023-01-10 00:17.06 [info     ] DQN_online_20230110001534: epoch=410 step=41000 epoch=410 metrics={'time_inference': 0.0002470135688781738, 'time_environment_step': 2.6459693908691407e-05, 'rollout_return': -0.1684124383221739, 'time_sample_batch': 6.312370300292969e-05, 'time_algorithm_update': 0.0014854884147644043, 'loss': 0.011654919332358986, 'time_step': 0.0018558263778686523, 'evaluation': -0.49281872803163856} step=41000
2023-01-10 00:17.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_41100.pt
2023-01-10 00:17.07 [info     ] DQN_online_20230110001534: epoch=411 step=41100 epoch=411 metrics={'time_inference': 0.0002424931526184082, 'time_environment_step': 2.7022361755371095e-05, 'time_sample_batch': 6.200551986694336e-05, 'time_algorithm_update': 0.0014389777183532715, 'loss': 0.012785298274829984, 'time_step': 0.00180658340

2023-01-10 00:17.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_42500.pt
2023-01-10 00:17.10 [info     ] DQN_online_20230110001534: epoch=425 step=42500 epoch=425 metrics={'time_inference': 0.0002499842643737793, 'time_environment_step': 2.7306079864501953e-05, 'time_sample_batch': 6.487607955932618e-05, 'time_algorithm_update': 0.0015348577499389648, 'loss': 0.012807022128254176, 'time_step': 0.0019107627868652343, 'rollout_return': 0.034747049055316115, 'evaluation': -0.547708739903263} step=42500
2023-01-10 00:17.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_42600.pt
2023-01-10 00:17.10 [info     ] DQN_online_20230110001534: epoch=426 step=42600 epoch=426 metrics={'time_inference': 0.0002338552474975586, 'time_environment_step': 2.5522708892822265e-05, 'time_sample_batch': 6.0596466064453124e-05, 'time_algorithm_update': 0.0014197683334350585, 'loss': 0.010782958440249785, 'time_step': 0.00177278757

2023-01-10 00:17.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_44000.pt
2023-01-10 00:17.13 [info     ] DQN_online_20230110001534: epoch=440 step=44000 epoch=440 metrics={'time_inference': 0.0002445292472839356, 'time_environment_step': 2.6786327362060546e-05, 'rollout_return': -0.32992689374758316, 'time_sample_batch': 6.342649459838867e-05, 'time_algorithm_update': 0.001439502239227295, 'loss': 0.012682883115485311, 'time_step': 0.0018085169792175294, 'evaluation': 0.3022777294436775} step=44000
2023-01-10 00:17.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_44100.pt
2023-01-10 00:17.13 [info     ] DQN_online_20230110001534: epoch=441 step=44100 epoch=441 metrics={'time_inference': 0.00023468017578125, 'time_environment_step': 2.559185028076172e-05, 'time_sample_batch': 5.976438522338867e-05, 'time_algorithm_update': 0.0014016580581665038, 'loss': 0.01438905709888786, 'time_step': 0.00175490379333496

2023-01-10 00:17.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_45500.pt
2023-01-10 00:17.17 [info     ] DQN_online_20230110001534: epoch=455 step=45500 epoch=455 metrics={'time_inference': 0.00023406982421875, 'time_environment_step': 2.471923828125e-05, 'time_sample_batch': 5.9909820556640625e-05, 'time_algorithm_update': 0.0014200735092163087, 'loss': 0.00977956231450662, 'time_step': 0.0017699003219604492, 'rollout_return': -0.36586565660240233, 'evaluation': -0.791549580502072} step=45500
2023-01-10 00:17.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_45600.pt
2023-01-10 00:17.17 [info     ] DQN_online_20230110001534: epoch=456 step=45600 epoch=456 metrics={'time_inference': 0.0002686905860900879, 'time_environment_step': 3.0934810638427734e-05, 'time_sample_batch': 7.661819458007813e-05, 'time_algorithm_update': 0.0016451454162597656, 'loss': 0.012354176288936288, 'time_step': 0.002057921886444091

2023-01-10 00:17.20 [info     ] DQN_online_20230110001534: epoch=470 step=47000 epoch=470 metrics={'time_inference': 0.00024216413497924806, 'time_environment_step': 2.580881118774414e-05, 'time_sample_batch': 6.021976470947266e-05, 'time_algorithm_update': 0.0014422941207885742, 'loss': 0.010548151572002097, 'time_step': 0.0018031883239746093, 'rollout_return': -0.1119208414964477, 'evaluation': -0.6771367989840391} step=47000
2023-01-10 00:17.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_47100.pt
2023-01-10 00:17.20 [info     ] DQN_online_20230110001534: epoch=471 step=47100 epoch=471 metrics={'time_inference': 0.00023730039596557616, 'time_environment_step': 2.5677680969238282e-05, 'time_sample_batch': 6.343603134155273e-05, 'time_algorithm_update': 0.0014622545242309571, 'loss': 0.010978114960016683, 'time_step': 0.0018216323852539061, 'rollout_return': -0.21819797140038236, 'evaluation': 0.29123737839336555} step=47100
2023-01-10 00:17.20

2023-01-10 00:17.23 [info     ] DQN_online_20230110001534: epoch=485 step=48500 epoch=485 metrics={'time_inference': 0.0002329111099243164, 'time_environment_step': 2.5544166564941406e-05, 'rollout_return': -0.28299969844862144, 'time_sample_batch': 5.8434009552001954e-05, 'time_algorithm_update': 0.0014198637008666993, 'loss': 0.010760823965538293, 'time_step': 0.0017718267440795898, 'evaluation': -0.2939050344140524} step=48500
2023-01-10 00:17.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_48600.pt
2023-01-10 00:17.24 [info     ] DQN_online_20230110001534: epoch=486 step=48600 epoch=486 metrics={'time_inference': 0.00023684501647949218, 'time_environment_step': 2.565622329711914e-05, 'time_sample_batch': 6.0205459594726565e-05, 'time_algorithm_update': 0.0014333343505859375, 'loss': 0.012399750916520133, 'time_step': 0.0017899179458618164, 'rollout_return': -0.2560523720562297, 'evaluation': -0.04425311936146351} step=48600
2023-01-10 00:17.

2023-01-10 00:17.27 [info     ] DQN_online_20230110001534: epoch=500 step=50000 epoch=500 metrics={'time_inference': 0.00024280786514282226, 'time_environment_step': 2.7053356170654297e-05, 'time_sample_batch': 6.272554397583007e-05, 'time_algorithm_update': 0.0014754796028137206, 'loss': 0.010965933135012165, 'time_step': 0.0018401432037353515, 'rollout_return': -0.1921576277991853, 'evaluation': -0.08819253138073735} step=50000
2023-01-10 00:17.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_50100.pt
2023-01-10 00:17.27 [info     ] DQN_online_20230110001534: epoch=501 step=50100 epoch=501 metrics={'time_inference': 0.0002559089660644531, 'time_environment_step': 2.8779506683349608e-05, 'time_sample_batch': 6.889581680297852e-05, 'time_algorithm_update': 0.0015375423431396485, 'loss': 0.012353627579286694, 'time_step': 0.0019276928901672363, 'rollout_return': -0.29573257876969905, 'evaluation': 0.044561179124279635} step=50100
2023-01-10 00:17.

2023-01-10 00:17.30 [info     ] DQN_online_20230110001534: epoch=515 step=51500 epoch=515 metrics={'time_inference': 0.000244448184967041, 'time_environment_step': 2.6288032531738282e-05, 'time_sample_batch': 6.38437271118164e-05, 'time_algorithm_update': 0.0014786958694458009, 'loss': 0.01115569624118507, 'time_step': 0.0018482351303100587, 'rollout_return': -0.2474408342123967, 'evaluation': -0.4205691062603921} step=51500
2023-01-10 00:17.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_51600.pt
2023-01-10 00:17.30 [info     ] DQN_online_20230110001534: epoch=516 step=51600 epoch=516 metrics={'time_inference': 0.00027265787124633787, 'time_environment_step': 3.103017807006836e-05, 'time_sample_batch': 7.943391799926758e-05, 'time_algorithm_update': 0.0016933989524841309, 'loss': 0.010578344566747546, 'time_step': 0.0021154403686523436, 'rollout_return': -0.20124768057452214, 'evaluation': -0.20132197742283015} step=51600
2023-01-10 00:17.31 [i

2023-01-10 00:17.34 [info     ] DQN_online_20230110001534: epoch=530 step=53000 epoch=530 metrics={'time_inference': 0.00024533987045288085, 'time_environment_step': 2.7399063110351563e-05, 'time_sample_batch': 6.357192993164062e-05, 'time_algorithm_update': 0.0015416073799133301, 'loss': 0.013838646134827286, 'time_step': 0.0019136309623718263, 'rollout_return': -0.2581356402211541, 'evaluation': -0.27354786415695653} step=53000
2023-01-10 00:17.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_53100.pt
2023-01-10 00:17.34 [info     ] DQN_online_20230110001534: epoch=531 step=53100 epoch=531 metrics={'time_inference': 0.0002757692337036133, 'time_environment_step': 3.1995773315429685e-05, 'time_sample_batch': 7.850885391235352e-05, 'time_algorithm_update': 0.0017111825942993165, 'loss': 0.009021298048319295, 'time_step': 0.0021366643905639648, 'rollout_return': -0.01959969087052771, 'evaluation': -0.2340088532904681} step=53100
2023-01-10 00:17.3

2023-01-10 00:17.37 [info     ] DQN_online_20230110001534: epoch=545 step=54500 epoch=545 metrics={'time_inference': 0.0002572798728942871, 'time_environment_step': 2.9666423797607424e-05, 'time_sample_batch': 7.30586051940918e-05, 'time_algorithm_update': 0.0016234827041625977, 'loss': 0.010940531018422916, 'time_step': 0.0020203590393066406, 'rollout_return': 0.1042192685618993, 'evaluation': -0.15141628952081915} step=54500
2023-01-10 00:17.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_54600.pt
2023-01-10 00:17.37 [info     ] DQN_online_20230110001534: epoch=546 step=54600 epoch=546 metrics={'time_inference': 0.00024659395217895507, 'time_environment_step': 2.759218215942383e-05, 'time_sample_batch': 6.504535675048828e-05, 'time_algorithm_update': 0.001548027992248535, 'loss': 0.01252071360941045, 'time_step': 0.0019211006164550782, 'rollout_return': -0.17803567341240756, 'evaluation': 0.04851759392023553} step=54600
2023-01-10 00:17.37 [in

2023-01-10 00:17.40 [info     ] DQN_online_20230110001534: epoch=560 step=56000 epoch=560 metrics={'time_inference': 0.0002504420280456543, 'time_environment_step': 2.8243064880371092e-05, 'time_sample_batch': 7.035017013549805e-05, 'time_algorithm_update': 0.0016137981414794922, 'loss': 0.010358670877758413, 'time_step': 0.0019976615905761717, 'rollout_return': -0.23598065089862738, 'evaluation': 0.08278000329565843} step=56000
2023-01-10 00:17.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_56100.pt
2023-01-10 00:17.41 [info     ] DQN_online_20230110001534: epoch=561 step=56100 epoch=561 metrics={'time_inference': 0.0002569794654846191, 'time_environment_step': 2.8269290924072264e-05, 'time_sample_batch': 7.220983505249024e-05, 'time_algorithm_update': 0.0016752123832702638, 'loss': 0.010691522225388326, 'time_step': 0.0020693039894104006, 'rollout_return': -0.1836732879172814, 'evaluation': -0.2548195185214239} step=56100
2023-01-10 00:17.41 

2023-01-10 00:17.44 [info     ] DQN_online_20230110001534: epoch=575 step=57500 epoch=575 metrics={'time_inference': 0.00023216962814331054, 'time_environment_step': 2.5851726531982424e-05, 'time_sample_batch': 6.126642227172852e-05, 'time_algorithm_update': 0.0015787315368652344, 'loss': 0.011750492183491588, 'time_step': 0.0019322848320007325, 'rollout_return': 0.007990050018814787, 'evaluation': 0.03095605273544595} step=57500
2023-01-10 00:17.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_57600.pt
2023-01-10 00:17.44 [info     ] DQN_online_20230110001534: epoch=576 step=57600 epoch=576 metrics={'time_inference': 0.00023482561111450194, 'time_environment_step': 2.6662349700927734e-05, 'rollout_return': -0.3132958965162903, 'time_sample_batch': 6.325483322143555e-05, 'time_algorithm_update': 0.0015730905532836915, 'loss': 0.013175865339580923, 'time_step': 0.0019317865371704102, 'evaluation': 0.12306439333443686} step=57600
2023-01-10 00:17.4

2023-01-10 00:17.47 [info     ] DQN_online_20230110001534: epoch=590 step=59000 epoch=590 metrics={'time_inference': 0.00025814294815063477, 'time_environment_step': 3.088235855102539e-05, 'time_sample_batch': 6.997346878051758e-05, 'time_algorithm_update': 0.0016769814491271974, 'loss': 0.010179817838361487, 'time_step': 0.0020722699165344237, 'rollout_return': 0.04000091031494697, 'evaluation': -0.4142114528883033} step=59000
2023-01-10 00:17.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_59100.pt
2023-01-10 00:17.47 [info     ] DQN_online_20230110001534: epoch=591 step=59100 epoch=591 metrics={'time_inference': 0.0002455925941467285, 'time_environment_step': 2.8557777404785157e-05, 'time_sample_batch': 6.845474243164063e-05, 'time_algorithm_update': 0.0016625618934631348, 'loss': 0.009632574629504233, 'time_step': 0.002041583061218262, 'rollout_return': 0.043399021267984596, 'evaluation': -0.2536017306312442} step=59100
2023-01-10 00:17.48 [

2023-01-10 00:17.51 [info     ] DQN_online_20230110001534: epoch=605 step=60500 epoch=605 metrics={'time_inference': 0.0002332901954650879, 'time_environment_step': 2.620697021484375e-05, 'time_sample_batch': 6.0961246490478515e-05, 'time_algorithm_update': 0.0015706300735473632, 'loss': 0.011131261391565204, 'time_step': 0.001923997402191162, 'rollout_return': -0.3112427834251806, 'evaluation': -0.22076442859392348} step=60500
2023-01-10 00:17.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_60600.pt
2023-01-10 00:17.51 [info     ] DQN_online_20230110001534: epoch=606 step=60600 epoch=606 metrics={'time_inference': 0.00023370742797851563, 'time_environment_step': 2.6483535766601562e-05, 'time_sample_batch': 6.221771240234375e-05, 'time_algorithm_update': 0.00158919095993042, 'loss': 0.011269952175207437, 'time_step': 0.0019449949264526366, 'rollout_return': -0.3687219086936623, 'evaluation': 0.009646140742737485} step=60600
2023-01-10 00:17.51 [

2023-01-10 00:17.54 [info     ] DQN_online_20230110001534: epoch=620 step=62000 epoch=620 metrics={'time_inference': 0.00023949384689331054, 'time_environment_step': 2.786397933959961e-05, 'time_sample_batch': 6.68787956237793e-05, 'time_algorithm_update': 0.0016389727592468263, 'loss': 0.009029379995772614, 'time_step': 0.0020066046714782714, 'rollout_return': -0.39572631090961413, 'evaluation': 0.19285847159475472} step=62000
2023-01-10 00:17.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_62100.pt
2023-01-10 00:17.54 [info     ] DQN_online_20230110001534: epoch=621 step=62100 epoch=621 metrics={'time_inference': 0.0002383589744567871, 'time_environment_step': 2.6938915252685546e-05, 'time_sample_batch': 6.365776062011719e-05, 'time_algorithm_update': 0.0016440582275390626, 'loss': 0.01082327202660963, 'time_step': 0.002007462978363037, 'rollout_return': 0.028363407927188108, 'evaluation': -0.3683711915804665} step=62100
2023-01-10 00:17.55 [i

2023-01-10 00:17.58 [info     ] DQN_online_20230110001534: epoch=635 step=63500 epoch=635 metrics={'time_inference': 0.00023649930953979493, 'time_environment_step': 2.6488304138183592e-05, 'time_sample_batch': 6.465196609497071e-05, 'time_algorithm_update': 0.0016585707664489747, 'loss': 0.00891771077294834, 'time_step': 0.0020198488235473634, 'rollout_return': 0.0234411209071044, 'evaluation': -0.22791422679210394} step=63500
2023-01-10 00:17.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_63600.pt
2023-01-10 00:17.58 [info     ] DQN_online_20230110001534: epoch=636 step=63600 epoch=636 metrics={'time_inference': 0.00023824691772460937, 'time_environment_step': 2.7730464935302735e-05, 'time_sample_batch': 6.308317184448242e-05, 'time_algorithm_update': 0.0016653299331665038, 'loss': 0.010608821797650308, 'time_step': 0.002029297351837158, 'rollout_return': 0.09968057862261866, 'evaluation': -0.17279940800057333} step=63600
2023-01-10 00:17.58 

2023-01-10 00:18.01 [info     ] DQN_online_20230110001534: epoch=650 step=65000 epoch=650 metrics={'time_inference': 0.0002437400817871094, 'time_environment_step': 2.6733875274658202e-05, 'time_sample_batch': 6.380558013916016e-05, 'time_algorithm_update': 0.00166964054107666, 'loss': 0.009899886548519135, 'time_step': 0.0020379233360290528, 'rollout_return': -0.04374687568507711, 'evaluation': -0.09427385210327692} step=65000
2023-01-10 00:18.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_65100.pt
2023-01-10 00:18.01 [info     ] DQN_online_20230110001534: epoch=651 step=65100 epoch=651 metrics={'time_inference': 0.00023684501647949218, 'time_environment_step': 2.50244140625e-05, 'time_sample_batch': 6.22391700744629e-05, 'time_algorithm_update': 0.0016681504249572753, 'loss': 0.00879364388412796, 'time_step': 0.0020238423347473146, 'rollout_return': -0.13527131698457526, 'evaluation': -0.007355636093085807} step=65100
2023-01-10 00:18.02 [inf

2023-01-10 00:18.05 [info     ] DQN_online_20230110001534: epoch=665 step=66500 epoch=665 metrics={'time_inference': 0.00023302555084228516, 'time_environment_step': 2.635955810546875e-05, 'rollout_return': -0.007494575031779888, 'time_sample_batch': 6.30044937133789e-05, 'time_algorithm_update': 0.0016928386688232421, 'loss': 0.010616370687494055, 'time_step': 0.0020476365089416504, 'evaluation': -0.08201304799400273} step=66500
2023-01-10 00:18.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_66600.pt
2023-01-10 00:18.05 [info     ] DQN_online_20230110001534: epoch=666 step=66600 epoch=666 metrics={'time_inference': 0.00023265600204467774, 'time_environment_step': 2.5546550750732423e-05, 'time_sample_batch': 6.754636764526367e-05, 'time_algorithm_update': 0.0016987490653991699, 'loss': 0.010381607129238546, 'time_step': 0.002058689594268799, 'rollout_return': -0.19837929689263042, 'evaluation': 0.3319749720892674} step=66600
2023-01-10 00:18.05

2023-01-10 00:18.08 [info     ] DQN_online_20230110001534: epoch=680 step=68000 epoch=680 metrics={'time_inference': 0.00024180889129638673, 'time_environment_step': 2.9234886169433595e-05, 'time_sample_batch': 7.00831413269043e-05, 'time_algorithm_update': 0.001760721206665039, 'loss': 0.011895843836246058, 'time_step': 0.0021380853652954103, 'rollout_return': -0.4036321914609341, 'evaluation': 0.6927954705387808} step=68000
2023-01-10 00:18.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_68100.pt
2023-01-10 00:18.09 [info     ] DQN_online_20230110001534: epoch=681 step=68100 epoch=681 metrics={'time_inference': 0.00024477005004882815, 'time_environment_step': 2.8557777404785157e-05, 'time_sample_batch': 6.551504135131836e-05, 'time_algorithm_update': 0.0017048168182373048, 'loss': 0.009287288734922185, 'time_step': 0.002078464031219482, 'rollout_return': -0.23195201369688503, 'evaluation': -0.28461108184665945} step=68100
2023-01-10 00:18.09 [

2023-01-10 00:18.12 [info     ] DQN_online_20230110001534: epoch=695 step=69500 epoch=695 metrics={'time_inference': 0.0002400493621826172, 'time_environment_step': 2.6514530181884764e-05, 'time_sample_batch': 6.345510482788085e-05, 'time_algorithm_update': 0.0017609357833862304, 'loss': 0.010438033738173544, 'time_step': 0.0021243977546691896, 'rollout_return': -0.23763732794214637, 'evaluation': -0.18168831918378017} step=69500
2023-01-10 00:18.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_69600.pt
2023-01-10 00:18.12 [info     ] DQN_online_20230110001534: epoch=696 step=69600 epoch=696 metrics={'time_inference': 0.00024881839752197263, 'time_environment_step': 2.7587413787841795e-05, 'time_sample_batch': 6.444931030273437e-05, 'time_algorithm_update': 0.0017897534370422364, 'loss': 0.011149109480320475, 'time_step': 0.0021656370162963868, 'rollout_return': -0.10087595238132807, 'evaluation': -0.2260646945292842} step=69600
2023-01-10 00:18.

2023-01-10 00:18.16 [info     ] DQN_online_20230110001534: epoch=710 step=71000 epoch=710 metrics={'time_inference': 0.0002455329895019531, 'time_environment_step': 2.833843231201172e-05, 'time_sample_batch': 6.759166717529297e-05, 'time_algorithm_update': 0.0017944502830505372, 'loss': 0.011681652192492039, 'time_step': 0.002169337272644043, 'rollout_return': -0.1777821097205765, 'evaluation': -0.6686480812180738} step=71000
2023-01-10 00:18.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_71100.pt
2023-01-10 00:18.16 [info     ] DQN_online_20230110001534: epoch=711 step=71100 epoch=711 metrics={'time_inference': 0.00024194002151489257, 'time_environment_step': 2.666950225830078e-05, 'rollout_return': 0.10872838560982073, 'time_sample_batch': 6.165742874145508e-05, 'time_algorithm_update': 0.0017567753791809082, 'loss': 0.01071174029726535, 'time_step': 0.0021218276023864746, 'evaluation': -0.14976018075070632} step=71100
2023-01-10 00:18.16 [in

2023-01-10 00:18.19 [info     ] DQN_online_20230110001534: epoch=725 step=72500 epoch=725 metrics={'time_inference': 0.00023732900619506835, 'time_environment_step': 2.6469230651855468e-05, 'time_sample_batch': 6.383180618286133e-05, 'time_algorithm_update': 0.001738905906677246, 'loss': 0.011426753411069513, 'time_step': 0.002101230621337891, 'rollout_return': -0.11920688331528961, 'evaluation': -0.2890205928106357} step=72500
2023-01-10 00:18.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_72600.pt
2023-01-10 00:18.20 [info     ] DQN_online_20230110001534: epoch=726 step=72600 epoch=726 metrics={'time_inference': 0.00023077249526977538, 'time_environment_step': 2.6254653930664064e-05, 'time_sample_batch': 6.253957748413086e-05, 'time_algorithm_update': 0.0017269062995910645, 'loss': 0.011088825762271881, 'time_step': 0.002079620361328125, 'rollout_return': -0.39752058293589365, 'evaluation': -0.1857726335430886} step=72600
2023-01-10 00:18.20 

2023-01-10 00:18.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_74100.pt
2023-01-10 00:18.23 [info     ] DQN_online_20230110001534: epoch=741 step=74100 epoch=741 metrics={'time_inference': 0.0002584528923034668, 'time_environment_step': 2.998828887939453e-05, 'time_sample_batch': 7.589340209960937e-05, 'time_algorithm_update': 0.0019400811195373535, 'loss': 0.009604280516505241, 'time_step': 0.0023384189605712893, 'rollout_return': -0.14986127296802698, 'evaluation': -0.17028427704409138} step=74100
2023-01-10 00:18.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_74200.pt
2023-01-10 00:18.24 [info     ] DQN_online_20230110001534: epoch=742 step=74200 epoch=742 metrics={'time_inference': 0.0002541208267211914, 'time_environment_step': 2.8002262115478516e-05, 'time_sample_batch': 6.811141967773438e-05, 'time_algorithm_update': 0.0018273305892944336, 'loss': 0.010635033784201369, 'time_step': 0.00221076965

2023-01-10 00:18.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_75600.pt
2023-01-10 00:18.27 [info     ] DQN_online_20230110001534: epoch=756 step=75600 epoch=756 metrics={'time_inference': 0.0002469491958618164, 'time_environment_step': 2.6443004608154296e-05, 'time_sample_batch': 6.669282913208008e-05, 'time_algorithm_update': 0.0017792248725891113, 'loss': 0.008915235376334749, 'time_step': 0.002152554988861084, 'rollout_return': -0.15583229558055783, 'evaluation': 0.2953550999862422} step=75600
2023-01-10 00:18.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_75700.pt
2023-01-10 00:18.27 [info     ] DQN_online_20230110001534: epoch=757 step=75700 epoch=757 metrics={'time_inference': 0.00026482343673706055, 'time_environment_step': 2.7325153350830078e-05, 'time_sample_batch': 6.705045700073243e-05, 'time_algorithm_update': 0.0017787837982177735, 'loss': 0.009768973623868077, 'time_step': 0.002174227237

2023-01-10 00:18.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_77100.pt
2023-01-10 00:18.31 [info     ] DQN_online_20230110001534: epoch=771 step=77100 epoch=771 metrics={'time_inference': 0.00023783922195434572, 'time_environment_step': 2.6407241821289064e-05, 'time_sample_batch': 6.398439407348633e-05, 'time_algorithm_update': 0.0017708396911621093, 'loss': 0.010746204130118713, 'time_step': 0.002132697105407715, 'rollout_return': -0.4319843340342909, 'evaluation': -0.20897119054051871} step=77100
2023-01-10 00:18.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_77200.pt
2023-01-10 00:18.31 [info     ] DQN_online_20230110001534: epoch=772 step=77200 epoch=772 metrics={'time_inference': 0.0002360677719116211, 'time_environment_step': 2.5432109832763672e-05, 'time_sample_batch': 6.481170654296874e-05, 'time_algorithm_update': 0.0017595648765563964, 'loss': 0.009628096937667578, 'time_step': 0.00211775541

2023-01-10 00:18.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_78600.pt
2023-01-10 00:18.34 [info     ] DQN_online_20230110001534: epoch=786 step=78600 epoch=786 metrics={'time_inference': 0.00023806095123291016, 'time_environment_step': 2.6390552520751953e-05, 'time_sample_batch': 6.581783294677734e-05, 'time_algorithm_update': 0.00174299955368042, 'loss': 0.01045786035945639, 'time_step': 0.00210529088973999, 'rollout_return': -0.15073098013018463, 'evaluation': -0.037720373567288065} step=78600
2023-01-10 00:18.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_78700.pt
2023-01-10 00:18.35 [info     ] DQN_online_20230110001534: epoch=787 step=78700 epoch=787 metrics={'time_inference': 0.00023936748504638672, 'time_environment_step': 2.6657581329345704e-05, 'time_sample_batch': 6.262302398681641e-05, 'time_algorithm_update': 0.001741013526916504, 'loss': 0.009557577249361203, 'time_step': 0.0021037912368

2023-01-10 00:18.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_80100.pt
2023-01-10 00:18.38 [info     ] DQN_online_20230110001534: epoch=801 step=80100 epoch=801 metrics={'time_inference': 0.00024018049240112304, 'time_environment_step': 2.6667118072509764e-05, 'time_sample_batch': 6.438970565795898e-05, 'time_algorithm_update': 0.0017812919616699218, 'loss': 0.010944626003038138, 'time_step': 0.0021441173553466797, 'rollout_return': -0.1267479775757139, 'evaluation': -0.19747425381222633} step=80100
2023-01-10 00:18.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_80200.pt
2023-01-10 00:18.38 [info     ] DQN_online_20230110001534: epoch=802 step=80200 epoch=802 metrics={'time_inference': 0.0002352142333984375, 'time_environment_step': 2.668142318725586e-05, 'time_sample_batch': 6.34455680847168e-05, 'time_algorithm_update': 0.001761801242828369, 'loss': 0.00989719462580979, 'time_step': 0.00211870908737

2023-01-10 00:18.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_81600.pt
2023-01-10 00:18.42 [info     ] DQN_online_20230110001534: epoch=816 step=81600 epoch=816 metrics={'time_inference': 0.00024448156356811525, 'time_environment_step': 2.8584003448486328e-05, 'rollout_return': -0.2928021246828447, 'time_sample_batch': 6.645917892456055e-05, 'time_algorithm_update': 0.0018169784545898438, 'loss': 0.010132444673217833, 'time_step': 0.002188718318939209, 'evaluation': -0.0011342485135222347} step=81600
2023-01-10 00:18.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_81700.pt
2023-01-10 00:18.42 [info     ] DQN_online_20230110001534: epoch=817 step=81700 epoch=817 metrics={'time_inference': 0.0002395486831665039, 'time_environment_step': 2.7327537536621095e-05, 'time_sample_batch': 6.621122360229492e-05, 'time_algorithm_update': 0.0017928099632263184, 'loss': 0.011292055932572112, 'time_step': 0.002157766

2023-01-10 00:18.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_83100.pt
2023-01-10 00:18.46 [info     ] DQN_online_20230110001534: epoch=831 step=83100 epoch=831 metrics={'time_inference': 0.00023789644241333007, 'time_environment_step': 2.5918483734130858e-05, 'time_sample_batch': 6.476402282714843e-05, 'time_algorithm_update': 0.0017737627029418945, 'loss': 0.009671719099860638, 'time_step': 0.0021343612670898437, 'rollout_return': -0.04369593145481779, 'evaluation': -0.4154753682621946} step=83100
2023-01-10 00:18.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_83200.pt
2023-01-10 00:18.46 [info     ] DQN_online_20230110001534: epoch=832 step=83200 epoch=832 metrics={'time_inference': 0.0002338409423828125, 'time_environment_step': 2.645254135131836e-05, 'time_sample_batch': 6.470441818237304e-05, 'time_algorithm_update': 0.0017955446243286134, 'loss': 0.010533086922368966, 'time_step': 0.00215107917

2023-01-10 00:18.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_84600.pt
2023-01-10 00:18.49 [info     ] DQN_online_20230110001534: epoch=846 step=84600 epoch=846 metrics={'time_inference': 0.0002537417411804199, 'time_environment_step': 2.814292907714844e-05, 'time_sample_batch': 7.262229919433594e-05, 'time_algorithm_update': 0.0018611669540405274, 'loss': 0.009780810904921965, 'time_step': 0.0022500061988830564, 'rollout_return': -0.0893769212246929, 'evaluation': -0.2554713801144269} step=84600
2023-01-10 00:18.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_84700.pt
2023-01-10 00:18.50 [info     ] DQN_online_20230110001534: epoch=847 step=84700 epoch=847 metrics={'time_inference': 0.00023258924484252929, 'time_environment_step': 2.6316642761230468e-05, 'time_sample_batch': 6.329774856567383e-05, 'time_algorithm_update': 0.00176771879196167, 'loss': 0.009679912570863963, 'time_step': 0.00212051868438

2023-01-10 00:18.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_86100.pt
2023-01-10 00:18.53 [info     ] DQN_online_20230110001534: epoch=861 step=86100 epoch=861 metrics={'time_inference': 0.0002465510368347168, 'time_environment_step': 2.7511119842529297e-05, 'time_sample_batch': 7.261037826538086e-05, 'time_algorithm_update': 0.0018064475059509277, 'loss': 0.008923749147215857, 'time_step': 0.002185354232788086, 'rollout_return': -0.39669833457719994, 'evaluation': -0.07573336422603029} step=86100
2023-01-10 00:18.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_86200.pt
2023-01-10 00:18.53 [info     ] DQN_online_20230110001534: epoch=862 step=86200 epoch=862 metrics={'time_inference': 0.00024778366088867187, 'time_environment_step': 2.7453899383544923e-05, 'time_sample_batch': 6.886005401611328e-05, 'time_algorithm_update': 0.0018515133857727052, 'loss': 0.008618664498208091, 'time_step': 0.0022280240

2023-01-10 00:18.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_87600.pt
2023-01-10 00:18.57 [info     ] DQN_online_20230110001534: epoch=876 step=87600 epoch=876 metrics={'time_inference': 0.0002494120597839355, 'time_environment_step': 2.7570724487304688e-05, 'time_sample_batch': 6.915807723999023e-05, 'time_algorithm_update': 0.0018402171134948731, 'loss': 0.010402878110762686, 'time_step': 0.0022176408767700193, 'rollout_return': -0.224231955079527, 'evaluation': -0.06253827103720562} step=87600
2023-01-10 00:18.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_87700.pt
2023-01-10 00:18.57 [info     ] DQN_online_20230110001534: epoch=877 step=87700 epoch=877 metrics={'time_inference': 0.00023540735244750977, 'time_environment_step': 2.5637149810791016e-05, 'time_sample_batch': 6.243705749511719e-05, 'time_algorithm_update': 0.0017687940597534179, 'loss': 0.009605218035867438, 'time_step': 0.00212342500

2023-01-10 00:19.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_89100.pt
2023-01-10 00:19.00 [info     ] DQN_online_20230110001534: epoch=891 step=89100 epoch=891 metrics={'time_inference': 0.00024158477783203124, 'time_environment_step': 2.8529167175292968e-05, 'rollout_return': -0.38328965957590705, 'time_sample_batch': 6.858110427856445e-05, 'time_algorithm_update': 0.001811518669128418, 'loss': 0.008957345767412335, 'time_step': 0.0021836471557617186, 'evaluation': -0.09168123613473284} step=89100
2023-01-10 00:19.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_89200.pt
2023-01-10 00:19.01 [info     ] DQN_online_20230110001534: epoch=892 step=89200 epoch=892 metrics={'time_inference': 0.0002457261085510254, 'time_environment_step': 3.19218635559082e-05, 'time_sample_batch': 6.689310073852539e-05, 'time_algorithm_update': 0.001780416965484619, 'loss': 0.008565977778052911, 'time_step': 0.0021569657325

2023-01-10 00:19.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_90600.pt
2023-01-10 00:19.04 [info     ] DQN_online_20230110001534: epoch=906 step=90600 epoch=906 metrics={'time_inference': 0.00023409605026245117, 'time_environment_step': 2.7120113372802734e-05, 'time_sample_batch': 6.55221939086914e-05, 'time_algorithm_update': 0.001786792278289795, 'loss': 0.009917354376520962, 'time_step': 0.0021453118324279786, 'rollout_return': -0.37325946074463695, 'evaluation': 0.03527557876206526} step=90600
2023-01-10 00:19.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_90700.pt
2023-01-10 00:19.04 [info     ] DQN_online_20230110001534: epoch=907 step=90700 epoch=907 metrics={'time_inference': 0.00023561716079711914, 'time_environment_step': 2.6793479919433593e-05, 'time_sample_batch': 6.811380386352539e-05, 'time_algorithm_update': 0.0017740845680236817, 'loss': 0.009635525648482144, 'time_step': 0.00213621139

2023-01-10 00:19.08 [info     ] DQN_online_20230110001534: epoch=921 step=92100 epoch=921 metrics={'time_inference': 0.00024736881256103515, 'time_environment_step': 2.7544498443603516e-05, 'time_sample_batch': 6.871223449707031e-05, 'time_algorithm_update': 0.001832125186920166, 'loss': 0.009079249138012528, 'time_step': 0.0022088003158569334, 'rollout_return': 0.04661942062117843, 'evaluation': -0.18512138536266742} step=92100
2023-01-10 00:19.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_92200.pt
2023-01-10 00:19.08 [info     ] DQN_online_20230110001534: epoch=922 step=92200 epoch=922 metrics={'time_inference': 0.0002414536476135254, 'time_environment_step': 2.6671886444091798e-05, 'time_sample_batch': 6.545543670654297e-05, 'time_algorithm_update': 0.0017953872680664062, 'loss': 0.01109175753314048, 'time_step': 0.0021623110771179198, 'rollout_return': -0.2839658894233911, 'evaluation': -0.675452180890078} step=92200
2023-01-10 00:19.08 [i

2023-01-10 00:19.12 [info     ] DQN_online_20230110001534: epoch=936 step=93600 epoch=936 metrics={'time_inference': 0.00024182796478271483, 'time_environment_step': 2.7632713317871095e-05, 'time_sample_batch': 6.608247756958008e-05, 'time_algorithm_update': 0.0018092894554138184, 'loss': 0.009843543351162226, 'time_step': 0.0021766185760498046, 'rollout_return': -0.672944053936201, 'evaluation': -0.34031074328309846} step=93600
2023-01-10 00:19.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_93700.pt
2023-01-10 00:19.12 [info     ] DQN_online_20230110001534: epoch=937 step=93700 epoch=937 metrics={'time_inference': 0.0002411198616027832, 'time_environment_step': 2.8221607208251954e-05, 'time_sample_batch': 6.510019302368164e-05, 'time_algorithm_update': 0.0017986774444580078, 'loss': 0.00900999017059803, 'time_step': 0.002164196968078613, 'rollout_return': -0.022550860743800954, 'evaluation': -0.19381330137360525} step=93700
2023-01-10 00:19.12

2023-01-10 00:19.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_95200.pt
2023-01-10 00:19.16 [info     ] DQN_online_20230110001534: epoch=952 step=95200 epoch=952 metrics={'time_inference': 0.00024408578872680664, 'time_environment_step': 2.593517303466797e-05, 'time_sample_batch': 6.554603576660157e-05, 'time_algorithm_update': 0.001856238842010498, 'loss': 0.010127763653872534, 'time_step': 0.00222259521484375, 'rollout_return': -0.2639773096160478, 'evaluation': -0.3965808758011778} step=95200
2023-01-10 00:19.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_95300.pt
2023-01-10 00:19.16 [info     ] DQN_online_20230110001534: epoch=953 step=95300 epoch=953 metrics={'time_inference': 0.00025738000869750976, 'time_environment_step': 2.818584442138672e-05, 'time_sample_batch': 6.836652755737305e-05, 'time_algorithm_update': 0.0018764662742614747, 'loss': 0.008590223209466785, 'time_step': 0.002263560295104

2023-01-10 00:19.20 [info     ] DQN_online_20230110001534: epoch=967 step=96700 epoch=967 metrics={'time_inference': 0.0002382969856262207, 'time_environment_step': 2.5413036346435548e-05, 'time_sample_batch': 6.398916244506836e-05, 'time_algorithm_update': 0.0018336606025695801, 'loss': 0.008600681249517947, 'time_step': 0.002191586494445801, 'rollout_return': 0.07691061332776883, 'evaluation': -0.3273033267188584} step=96700
2023-01-10 00:19.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_96800.pt
2023-01-10 00:19.20 [info     ] DQN_online_20230110001534: epoch=968 step=96800 epoch=968 metrics={'time_inference': 0.000262603759765625, 'time_environment_step': 3.111839294433594e-05, 'time_sample_batch': 7.407426834106445e-05, 'time_algorithm_update': 0.0019017386436462403, 'loss': 0.008015007502399385, 'time_step': 0.0023038792610168457, 'rollout_return': -2.125643227731094, 'evaluation': 0.6288837389459989} step=96800
2023-01-10 00:19.20 [info 

2023-01-10 00:19.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_98300.pt
2023-01-10 00:19.24 [info     ] DQN_online_20230110001534: epoch=983 step=98300 epoch=983 metrics={'time_inference': 0.0002353954315185547, 'time_environment_step': 2.7024745941162108e-05, 'time_sample_batch': 6.507158279418945e-05, 'time_algorithm_update': 0.0017878198623657226, 'loss': 0.01085068276966922, 'time_step': 0.002146754264831543, 'rollout_return': -0.4273753199240392, 'evaluation': -0.22995945940103663} step=98300
2023-01-10 00:19.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_98400.pt
2023-01-10 00:19.24 [info     ] DQN_online_20230110001534: epoch=984 step=98400 epoch=984 metrics={'time_inference': 0.0002429962158203125, 'time_environment_step': 2.760171890258789e-05, 'time_sample_batch': 6.615161895751953e-05, 'time_algorithm_update': 0.001822192668914795, 'loss': 0.010744166299700738, 'time_step': 0.002190864086151

2023-01-10 00:19.27 [info     ] DQN_online_20230110001534: epoch=998 step=99800 epoch=998 metrics={'time_inference': 0.0002514195442199707, 'time_environment_step': 2.9206275939941406e-05, 'time_sample_batch': 6.891965866088867e-05, 'time_algorithm_update': 0.0019275355339050293, 'loss': 0.012710023527033628, 'time_step': 0.0023089718818664552, 'rollout_return': 0.20773287414783148, 'evaluation': -0.3968976577701297} step=99800
2023-01-10 00:19.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_online_20230110001534/model_99900.pt
2023-01-10 00:19.28 [info     ] DQN_online_20230110001534: epoch=999 step=99900 epoch=999 metrics={'time_inference': 0.0002386164665222168, 'time_environment_step': 2.866506576538086e-05, 'time_sample_batch': 6.636381149291993e-05, 'time_algorithm_update': 0.0018741345405578612, 'loss': 0.009352241260930896, 'time_step': 0.0022390007972717283, 'rollout_return': -0.43818730822060004, 'evaluation': 0.3890518766017283} step=99900
2023-01-10 00:19.28 [i

In [None]:
observations = env.reset()


In [None]:
scorer = evaluate_on_environment(env)
mean_episode_return = scorer(dqn)
print(mean_episode_return)

0.14403840536286852


# Offline Training

In [None]:
env = StocksEnv(stock_data, bars_count=BARS_COUNT)

# prepare algorithm
dqn = DQN(
    batch_size=32,
    learning_rate=2.5e-4,
    target_update_interval=100,
)

# prepare replay buffer
buffer = ReplayBuffer(maxlen=1000000, env=env)

# start data collection without updates
dqn.collect(env, buffer)

# export to MDPDataset
dataset = buffer.to_mdp_dataset()

2023-01-10 00:47.17 [debug    ] Building model...
2023-01-10 00:47.17 [debug    ] Model has been built.


  0%|          | 0/1000000 [00:00<?, ?it/s]

In [None]:
# divide dataset into train and test
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

In [None]:
# initialize neural networks with the given observation shape and action size.
dqn.build_with_dataset(dataset)



- td_error_scorer : This metics suggests how Q functions overfit to training sets. If the TD error is large, the Q functions are overfitting.
- average_value_estimation : This metrics suggests the scale for estimation of Q functions. If average value estimation is too large, the Q functions overestimate action-values, which possibly makes training failed.
- evaluate_on_environment : Returns scorer function of evaluation on environment.The metrics of the scorer function is ideal metrics to evaluate the resulted policies.

In [None]:
dqn.fit(
    train_episodes,
    eval_episodes=test_episodes,
    n_epochs=1000,
    n_steps_per_epoch=1000,
    scorers={
        "td_error": td_error_scorer,
        "value_scale": average_value_estimation_scorer,
        "environment": evaluate_on_environment(env,epsilon= 0.3),
    },
    save_metrics = True,
    tensorboard_dir= 'runs'

)

2023-01-10 00:50.15 [debug    ] RoundIterator is selected.
2023-01-10 00:50.15 [info     ] Directory is created at d3rlpy_logs/DQN_20230110005015
2023-01-10 00:50.15 [info     ] Parameters are saved to d3rlpy_logs/DQN_20230110005015/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.00025, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 'target_update_interval': 100, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (32,), 'action_size': 1}


Epoch 1/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:50.57 [info     ] DQN_20230110005015: epoch=1 step=24029 epoch=1 metrics={'time_sample_batch': 6.626483687519447e-05, 'time_algorithm_update': 0.001359952027453699, 'loss': 2.7710212748940768e-08, 'time_step': 0.0016618959068969235, 'td_error': 1.6867854998729193e-10, 'value_scale': -0.0008007722233549895, 'environment': -0.3433559172330668} step=24029
2023-01-10 00:50.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_24029.pt


Epoch 2/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:51.43 [info     ] DQN_20230110005015: epoch=2 step=48058 epoch=2 metrics={'time_sample_batch': 7.548423100958514e-05, 'time_algorithm_update': 0.0014980420562637458, 'loss': 5.8395315133830114e-09, 'time_step': 0.0018405191877211835, 'td_error': 3.18049013957448e-11, 'value_scale': -2.7675101344597585e-05, 'environment': -0.29663283507369453} step=48058
2023-01-10 00:51.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_48058.pt


Epoch 3/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:52.31 [info     ] DQN_20230110005015: epoch=3 step=72087 epoch=3 metrics={'time_sample_batch': 6.913206114831094e-05, 'time_algorithm_update': 0.0015842252577570098, 'loss': 1.9419152696623557e-09, 'time_step': 0.0019019366828753035, 'td_error': 2.348800924694984e-11, 'value_scale': 9.345512144411509e-05, 'environment': 0.2487191953265734} step=72087
2023-01-10 00:52.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_72087.pt


Epoch 4/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:53.20 [info     ] DQN_20230110005015: epoch=4 step=96116 epoch=4 metrics={'time_sample_batch': 6.848309508294339e-05, 'time_algorithm_update': 0.0016661991394981521, 'loss': 9.058478770313852e-10, 'time_step': 0.001974446044551026, 'td_error': 1.6049748950671368e-11, 'value_scale': -9.175152066156785e-05, 'environment': -0.25774756336591303} step=96116
2023-01-10 00:53.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_96116.pt


Epoch 5/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:54.11 [info     ] DQN_20230110005015: epoch=5 step=120145 epoch=5 metrics={'time_sample_batch': 7.21301482388468e-05, 'time_algorithm_update': 0.001743142775190651, 'loss': 4.2803234725021444e-10, 'time_step': 0.0020727534494356763, 'td_error': 1.0567051371045103e-11, 'value_scale': 9.300516321363185e-05, 'environment': -0.2729967998441772} step=120145
2023-01-10 00:54.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_120145.pt


Epoch 6/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:54.59 [info     ] DQN_20230110005015: epoch=6 step=144174 epoch=6 metrics={'time_sample_batch': 6.084668561410029e-05, 'time_algorithm_update': 0.0016616682339718719, 'loss': 2.3216165386354745e-10, 'time_step': 0.001935493763012358, 'td_error': 7.003314456377729e-12, 'value_scale': -2.239951467893055e-06, 'environment': -0.31431543423141683} step=144174
2023-01-10 00:54.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_144174.pt


Epoch 7/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:55.49 [info     ] DQN_20230110005015: epoch=7 step=168203 epoch=7 metrics={'time_sample_batch': 6.110461107765455e-05, 'time_algorithm_update': 0.0017095370270636472, 'loss': 1.5174975815193508e-10, 'time_step': 0.0019863423468086416, 'td_error': 5.829195292378883e-12, 'value_scale': -9.289063768480889e-05, 'environment': -0.2622659553342874} step=168203
2023-01-10 00:55.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_168203.pt


Epoch 8/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:56.39 [info     ] DQN_20230110005015: epoch=8 step=192232 epoch=8 metrics={'time_sample_batch': 6.067608471314427e-05, 'time_algorithm_update': 0.0017185038933668043, 'loss': 1.144396277563259e-10, 'time_step': 0.001989077507688032, 'td_error': 5.502027502279297e-12, 'value_scale': -0.00011261586317416729, 'environment': -0.24946973948609436} step=192232
2023-01-10 00:56.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_192232.pt


Epoch 9/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:57.29 [info     ] DQN_20230110005015: epoch=9 step=216261 epoch=9 metrics={'time_sample_batch': 6.422257720072214e-05, 'time_algorithm_update': 0.0017334824123554817, 'loss': 8.802139035355762e-11, 'time_step': 0.0020399069120297644, 'td_error': 2.6962023721647297e-12, 'value_scale': 3.515423236890496e-05, 'environment': -0.18637081634447195} step=216261
2023-01-10 00:57.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_216261.pt


Epoch 10/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:58.19 [info     ] DQN_20230110005015: epoch=10 step=240290 epoch=10 metrics={'time_sample_batch': 6.325680782218576e-05, 'time_algorithm_update': 0.0017176505316657677, 'loss': 7.301941265232923e-11, 'time_step': 0.0020114825535784988, 'td_error': 3.184066604631371e-12, 'value_scale': 8.106930300092969e-05, 'environment': -0.09969313508448638} step=240290
2023-01-10 00:58.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_240290.pt


Epoch 11/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 00:59.09 [info     ] DQN_20230110005015: epoch=11 step=264319 epoch=11 metrics={'time_sample_batch': 6.058955392000494e-05, 'time_algorithm_update': 0.0016907150366664593, 'loss': 6.380200661577267e-11, 'time_step': 0.0019849495790718033, 'td_error': 1.5541178880143886e-12, 'value_scale': 7.976209312622047e-06, 'environment': -0.19213887612233063} step=264319
2023-01-10 00:59.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_264319.pt


Epoch 12/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:00.00 [info     ] DQN_20230110005015: epoch=12 step=288348 epoch=12 metrics={'time_sample_batch': 6.110047355434997e-05, 'time_algorithm_update': 0.0017528471029317257, 'loss': 5.612753824152192e-11, 'time_step': 0.002041250714113106, 'td_error': 1.3662591817559411e-12, 'value_scale': 2.8015256876114386e-05, 'environment': -0.10190455523503825} step=288348
2023-01-10 01:00.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_288348.pt


Epoch 13/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:00.53 [info     ] DQN_20230110005015: epoch=13 step=312377 epoch=13 metrics={'time_sample_batch': 6.555577253966927e-05, 'time_algorithm_update': 0.0018080560211196882, 'loss': 4.8427207666853575e-11, 'time_step': 0.0021266168986244336, 'td_error': 1.402090014919605e-12, 'value_scale': 4.148083539624311e-05, 'environment': 0.17851702958064952} step=312377
2023-01-10 01:00.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_312377.pt


Epoch 14/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:01.44 [info     ] DQN_20230110005015: epoch=14 step=336406 epoch=14 metrics={'time_sample_batch': 6.470404798814165e-05, 'time_algorithm_update': 0.0017467645567114064, 'loss': 4.52429216714646e-11, 'time_step': 0.002037553961066957, 'td_error': 1.6938261736426965e-12, 'value_scale': -6.319625749333386e-05, 'environment': 0.275528653631233} step=336406
2023-01-10 01:01.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_336406.pt


Epoch 15/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:02.35 [info     ] DQN_20230110005015: epoch=15 step=360435 epoch=15 metrics={'time_sample_batch': 6.069379569419617e-05, 'time_algorithm_update': 0.0017522189336261816, 'loss': 4.026959674639616e-11, 'time_step': 0.0020442571357829607, 'td_error': 8.824578706889877e-13, 'value_scale': -3.203151665520026e-05, 'environment': -0.014979824306219141} step=360435
2023-01-10 01:02.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_360435.pt


Epoch 16/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:03.26 [info     ] DQN_20230110005015: epoch=16 step=384464 epoch=16 metrics={'time_sample_batch': 6.163349966812563e-05, 'time_algorithm_update': 0.001763789492742097, 'loss': 3.7377141872312345e-11, 'time_step': 0.0020506647107524666, 'td_error': 2.6452014707494045e-12, 'value_scale': 0.0001154800050722946, 'environment': -0.038992972762908855} step=384464
2023-01-10 01:03.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_384464.pt


Epoch 17/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:04.18 [info     ] DQN_20230110005015: epoch=17 step=408493 epoch=17 metrics={'time_sample_batch': 5.992372025363558e-05, 'time_algorithm_update': 0.0017581931700038328, 'loss': 3.49275250724336e-11, 'time_step': 0.002047582434488695, 'td_error': 6.308539335228196e-13, 'value_scale': -3.263543350789258e-05, 'environment': -0.26729886445856366} step=408493
2023-01-10 01:04.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_408493.pt


Epoch 18/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:05.09 [info     ] DQN_20230110005015: epoch=18 step=432522 epoch=18 metrics={'time_sample_batch': 6.106488291623615e-05, 'time_algorithm_update': 0.0017632520711299427, 'loss': 3.275837119685091e-11, 'time_step': 0.0020492334451968713, 'td_error': 5.244167497012377e-13, 'value_scale': -3.676972671063407e-06, 'environment': -0.19236730250506415} step=432522
2023-01-10 01:05.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_432522.pt


Epoch 19/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:06.01 [info     ] DQN_20230110005015: epoch=19 step=456551 epoch=19 metrics={'time_sample_batch': 6.0387172474337854e-05, 'time_algorithm_update': 0.0017949897225238772, 'loss': 3.063659642766603e-11, 'time_step': 0.0020881836430907096, 'td_error': 6.357213672086071e-13, 'value_scale': -4.359177621398539e-05, 'environment': 0.052929377448113414} step=456551
2023-01-10 01:06.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_456551.pt


Epoch 20/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:06.53 [info     ] DQN_20230110005015: epoch=20 step=480580 epoch=20 metrics={'time_sample_batch': 6.141219674298178e-05, 'time_algorithm_update': 0.0017843113305134205, 'loss': 3.020814592390544e-11, 'time_step': 0.002073784000324178, 'td_error': 4.2736824057744653e-13, 'value_scale': -3.571670875131333e-05, 'environment': -0.058085685742931124} step=480580
2023-01-10 01:06.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_480580.pt


Epoch 21/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:07.42 [info     ] DQN_20230110005015: epoch=21 step=504609 epoch=21 metrics={'time_sample_batch': 5.803809113764248e-05, 'time_algorithm_update': 0.0016995808759617337, 'loss': 2.6945038460864874e-11, 'time_step': 0.0019805572260045753, 'td_error': 9.630788964123443e-14, 'value_scale': -1.1468180893731987e-06, 'environment': -0.27188746758913895} step=504609
2023-01-10 01:07.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_504609.pt


Epoch 22/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:08.33 [info     ] DQN_20230110005015: epoch=22 step=528638 epoch=22 metrics={'time_sample_batch': 6.030200101139691e-05, 'time_algorithm_update': 0.0017595076819941406, 'loss': 2.5277149716533846e-11, 'time_step': 0.002042578650729388, 'td_error': 3.750494435641877e-13, 'value_scale': 4.726540031612531e-05, 'environment': 0.07539550155084802} step=528638
2023-01-10 01:08.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_528638.pt


Epoch 23/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:09.23 [info     ] DQN_20230110005015: epoch=23 step=552667 epoch=23 metrics={'time_sample_batch': 5.84955801652847e-05, 'time_algorithm_update': 0.0017233468395894999, 'loss': 2.5494013451347893e-11, 'time_step': 0.0020072687986395256, 'td_error': 4.424873719407185e-14, 'value_scale': 1.5338972661188867e-05, 'environment': -0.34703171336813315} step=552667
2023-01-10 01:09.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_552667.pt


Epoch 24/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:10.13 [info     ] DQN_20230110005015: epoch=24 step=576696 epoch=24 metrics={'time_sample_batch': 5.882402212314478e-05, 'time_algorithm_update': 0.0017323039425283123, 'loss': 2.4873641756041418e-11, 'time_step': 0.002010415320618877, 'td_error': 1.62070514631548e-13, 'value_scale': -3.123410031594741e-05, 'environment': -0.2288401158455032} step=576696
2023-01-10 01:10.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_576696.pt


Epoch 25/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:11.06 [info     ] DQN_20230110005015: epoch=25 step=600725 epoch=25 metrics={'time_sample_batch': 5.994777146824053e-05, 'time_algorithm_update': 0.0017914683329954373, 'loss': 2.4110165243782447e-11, 'time_step': 0.002083154875437683, 'td_error': 2.154132660068383e-13, 'value_scale': 3.6238685380825594e-05, 'environment': -0.49809980084579797} step=600725
2023-01-10 01:11.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_600725.pt


Epoch 26/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:11.58 [info     ] DQN_20230110005015: epoch=26 step=624754 epoch=26 metrics={'time_sample_batch': 6.114762346020918e-05, 'time_algorithm_update': 0.0018006298911026283, 'loss': 2.4272719921258484e-11, 'time_step': 0.002090399242170771, 'td_error': 1.620907631055239e-13, 'value_scale': 3.1327825315773093e-05, 'environment': -0.8402127713705309} step=624754
2023-01-10 01:11.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_624754.pt


Epoch 27/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:12.49 [info     ] DQN_20230110005015: epoch=27 step=648783 epoch=27 metrics={'time_sample_batch': 6.037729004457443e-05, 'time_algorithm_update': 0.001777410824709932, 'loss': 2.5378759587174466e-11, 'time_step': 0.0020684529851054357, 'td_error': 2.0037661836957246e-13, 'value_scale': -3.4960289436207624e-05, 'environment': -0.1263795057662041} step=648783
2023-01-10 01:12.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_648783.pt


Epoch 28/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:13.41 [info     ] DQN_20230110005015: epoch=28 step=672812 epoch=28 metrics={'time_sample_batch': 5.936817093146339e-05, 'time_algorithm_update': 0.0017623091722339412, 'loss': 2.4710836940099712e-11, 'time_step': 0.002044007832640154, 'td_error': 6.116543747622485e-13, 'value_scale': -6.157073204124646e-05, 'environment': -0.48854638356308744} step=672812
2023-01-10 01:13.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_672812.pt


Epoch 29/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:14.32 [info     ] DQN_20230110005015: epoch=29 step=696841 epoch=29 metrics={'time_sample_batch': 5.9567000258801865e-05, 'time_algorithm_update': 0.0017627681396572008, 'loss': 2.444467241956602e-11, 'time_step': 0.002050739751732688, 'td_error': 6.730969441997098e-14, 'value_scale': 1.9964209163669976e-05, 'environment': -0.08671122323329875} step=696841
2023-01-10 01:14.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_696841.pt


Epoch 30/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:15.24 [info     ] DQN_20230110005015: epoch=30 step=720870 epoch=30 metrics={'time_sample_batch': 6.0451487644745955e-05, 'time_algorithm_update': 0.001792964012942623, 'loss': 2.5121610107298598e-11, 'time_step': 0.0020828484903486627, 'td_error': 1.9865304183864082e-13, 'value_scale': 3.478108014403703e-05, 'environment': -0.004992148042911426} step=720870
2023-01-10 01:15.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_720870.pt


Epoch 31/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:16.17 [info     ] DQN_20230110005015: epoch=31 step=744899 epoch=31 metrics={'time_sample_batch': 6.099584481754753e-05, 'time_algorithm_update': 0.0018181747064404314, 'loss': 2.505871392183176e-11, 'time_step': 0.0021138970208647266, 'td_error': 6.024255446628418e-13, 'value_scale': 6.112022855976179e-05, 'environment': 0.07820154784422569} step=744899
2023-01-10 01:16.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_744899.pt


Epoch 32/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:17.08 [info     ] DQN_20230110005015: epoch=32 step=768928 epoch=32 metrics={'time_sample_batch': 5.966216329480682e-05, 'time_algorithm_update': 0.001776618106995556, 'loss': 2.5165737847845093e-11, 'time_step': 0.002063366956908067, 'td_error': 6.457380466420573e-13, 'value_scale': 6.328112774305957e-05, 'environment': -0.29861786990214895} step=768928
2023-01-10 01:17.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_768928.pt


Epoch 33/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:18.00 [info     ] DQN_20230110005015: epoch=33 step=792957 epoch=33 metrics={'time_sample_batch': 5.9719493293976524e-05, 'time_algorithm_update': 0.001783547297642866, 'loss': 2.4982666699841006e-11, 'time_step': 0.0020747366228960513, 'td_error': 3.150318649881266e-13, 'value_scale': 4.4028247556070125e-05, 'environment': -0.2675108755630782} step=792957
2023-01-10 01:18.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_792957.pt


Epoch 34/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:18.53 [info     ] DQN_20230110005015: epoch=34 step=816986 epoch=34 metrics={'time_sample_batch': 6.003833064138381e-05, 'time_algorithm_update': 0.00181060191759372, 'loss': 2.4569528723925044e-11, 'time_step': 0.002100007702405446, 'td_error': 2.5701309753850546e-14, 'value_scale': 1.195072976344812e-05, 'environment': -0.025895798230313006} step=816986
2023-01-10 01:18.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_816986.pt


Epoch 35/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:19.43 [info     ] DQN_20230110005015: epoch=35 step=841015 epoch=35 metrics={'time_sample_batch': 5.9271152459348235e-05, 'time_algorithm_update': 0.0017322342396477019, 'loss': 2.5010103661746168e-11, 'time_step': 0.0020206326218727717, 'td_error': 9.085354226282463e-14, 'value_scale': 2.3608197841321144e-05, 'environment': -0.13576693298142908} step=841015
2023-01-10 01:19.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_841015.pt


Epoch 36/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:20.35 [info     ] DQN_20230110005015: epoch=36 step=865044 epoch=36 metrics={'time_sample_batch': 5.985519810509526e-05, 'time_algorithm_update': 0.0017766210538646722, 'loss': 2.5406939326706964e-11, 'time_step': 0.002064908645717515, 'td_error': 4.694007751816521e-15, 'value_scale': -4.3024028449291615e-06, 'environment': 0.16869171453421722} step=865044
2023-01-10 01:20.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_865044.pt


Epoch 37/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:21.28 [info     ] DQN_20230110005015: epoch=37 step=889073 epoch=37 metrics={'time_sample_batch': 6.050279491814619e-05, 'time_algorithm_update': 0.0018157257391394506, 'loss': 2.448417431860486e-11, 'time_step': 0.002113425769859091, 'td_error': 1.6244079075897508e-13, 'value_scale': 3.1679469068577694e-05, 'environment': -0.6975458551635304} step=889073
2023-01-10 01:21.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_889073.pt


Epoch 38/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:22.19 [info     ] DQN_20230110005015: epoch=38 step=913102 epoch=38 metrics={'time_sample_batch': 5.998852160783727e-05, 'time_algorithm_update': 0.0017635133205031062, 'loss': 2.4837585768312217e-11, 'time_step': 0.0020498159429921735, 'td_error': 8.411782804284827e-13, 'value_scale': 7.22551785820272e-05, 'environment': 0.05366558872523035} step=913102
2023-01-10 01:22.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_913102.pt


Epoch 39/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:23.10 [info     ] DQN_20230110005015: epoch=39 step=937131 epoch=39 metrics={'time_sample_batch': 5.902067850658949e-05, 'time_algorithm_update': 0.0017332719741498066, 'loss': 2.4548313779333598e-11, 'time_step': 0.002024076618878968, 'td_error': 4.513320485949534e-13, 'value_scale': -5.291150853116644e-05, 'environment': -0.07559220115851177} step=937131
2023-01-10 01:23.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_937131.pt


Epoch 40/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:24.02 [info     ] DQN_20230110005015: epoch=40 step=961160 epoch=40 metrics={'time_sample_batch': 5.986725347875244e-05, 'time_algorithm_update': 0.0017946182781055807, 'loss': 2.509929246815836e-11, 'time_step': 0.002085284638272882, 'td_error': 2.558055888667682e-13, 'value_scale': -3.9803631669056943e-05, 'environment': 0.08915780391443269} step=961160
2023-01-10 01:24.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_961160.pt


Epoch 41/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:24.54 [info     ] DQN_20230110005015: epoch=41 step=985189 epoch=41 metrics={'time_sample_batch': 5.917076046703271e-05, 'time_algorithm_update': 0.0017790783557449716, 'loss': 2.529596724484415e-11, 'time_step': 0.002075482984474027, 'td_error': 6.628835487011728e-14, 'value_scale': -2.0176597936283658e-05, 'environment': -0.05541614856368708} step=985189
2023-01-10 01:24.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_985189.pt


Epoch 42/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:25.45 [info     ] DQN_20230110005015: epoch=42 step=1009218 epoch=42 metrics={'time_sample_batch': 6.0080102759158896e-05, 'time_algorithm_update': 0.0017843068258717413, 'loss': 2.610892331648262e-11, 'time_step': 0.0020741508210353763, 'td_error': 7.606336031301226e-14, 'value_scale': -2.154065589226186e-05, 'environment': -0.314115679933503} step=1009218
2023-01-10 01:25.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1009218.pt


Epoch 43/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:26.37 [info     ] DQN_20230110005015: epoch=43 step=1033247 epoch=43 metrics={'time_sample_batch': 5.938939434237097e-05, 'time_algorithm_update': 0.0017696235990438962, 'loss': 2.517496413571186e-11, 'time_step': 0.0020652038981898743, 'td_error': 8.765291506099338e-13, 'value_scale': 7.383652571598595e-05, 'environment': -0.2017469538794235} step=1033247
2023-01-10 01:26.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1033247.pt


Epoch 44/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:27.28 [info     ] DQN_20230110005015: epoch=44 step=1057276 epoch=44 metrics={'time_sample_batch': 5.9663998886680545e-05, 'time_algorithm_update': 0.0017516978140542894, 'loss': 2.706145887839894e-11, 'time_step': 0.0020395863283895477, 'td_error': 8.980512815871991e-13, 'value_scale': 7.473892182830183e-05, 'environment': -0.14755855238236176} step=1057276
2023-01-10 01:27.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1057276.pt


Epoch 45/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:28.18 [info     ] DQN_20230110005015: epoch=45 step=1081305 epoch=45 metrics={'time_sample_batch': 5.8199672833121645e-05, 'time_algorithm_update': 0.001730458557523886, 'loss': 2.4381015250284657e-11, 'time_step': 0.0020185994707028407, 'td_error': 5.304436360615851e-14, 'value_scale': -1.7980215462571034e-05, 'environment': -0.3190039960950229} step=1081305
2023-01-10 01:28.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1081305.pt


Epoch 46/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:29.10 [info     ] DQN_20230110005015: epoch=46 step=1105334 epoch=46 metrics={'time_sample_batch': 5.94875240917286e-05, 'time_algorithm_update': 0.001786762679122778, 'loss': 2.595787712821892e-11, 'time_step': 0.002077891032959401, 'td_error': 7.170474559801347e-13, 'value_scale': -6.677950653059289e-05, 'environment': -0.33444000280898434} step=1105334
2023-01-10 01:29.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1105334.pt


Epoch 47/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:30.03 [info     ] DQN_20230110005015: epoch=47 step=1129363 epoch=47 metrics={'time_sample_batch': 6.08089517984474e-05, 'time_algorithm_update': 0.0018196400445500505, 'loss': 2.4548847036038878e-11, 'time_step': 0.0021216384857214565, 'td_error': 7.286338570488731e-14, 'value_scale': 2.1174256897329148e-05, 'environment': -0.3962675478648956} step=1129363
2023-01-10 01:30.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1129363.pt


Epoch 48/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:30.56 [info     ] DQN_20230110005015: epoch=48 step=1153392 epoch=48 metrics={'time_sample_batch': 6.06237852279204e-05, 'time_algorithm_update': 0.0018013146859369473, 'loss': 2.566035961488588e-11, 'time_step': 0.0020947649245841794, 'td_error': 9.46880839109534e-14, 'value_scale': 2.4164144237741523e-05, 'environment': 0.29167195834377446} step=1153392
2023-01-10 01:30.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1153392.pt


Epoch 49/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:31.48 [info     ] DQN_20230110005015: epoch=49 step=1177421 epoch=49 metrics={'time_sample_batch': 5.995788210672338e-05, 'time_algorithm_update': 0.0017794427554593203, 'loss': 2.5042382605130398e-11, 'time_step': 0.0020780268965244113, 'td_error': 1.001112273578263e-14, 'value_scale': -7.6005021027434e-06, 'environment': -0.03128932937466835} step=1177421
2023-01-10 01:31.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1177421.pt


Epoch 50/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:32.40 [info     ] DQN_20230110005015: epoch=50 step=1201450 epoch=50 metrics={'time_sample_batch': 6.131590258549788e-05, 'time_algorithm_update': 0.0018230030671493199, 'loss': 2.4468147636670782e-11, 'time_step': 0.002120455481486253, 'td_error': 3.8343088877820667e-13, 'value_scale': 4.8793663154907275e-05, 'environment': -0.1715725866215754} step=1201450
2023-01-10 01:32.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1201450.pt


Epoch 51/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:33.33 [info     ] DQN_20230110005015: epoch=51 step=1225479 epoch=51 metrics={'time_sample_batch': 5.939707406188592e-05, 'time_algorithm_update': 0.0017834988380173995, 'loss': 2.559120872483341e-11, 'time_step': 0.002085079230581153, 'td_error': 4.880769930891253e-13, 'value_scale': -5.507823246849508e-05, 'environment': -0.5092882569640742} step=1225479
2023-01-10 01:33.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1225479.pt


Epoch 52/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:34.23 [info     ] DQN_20230110005015: epoch=52 step=1249508 epoch=52 metrics={'time_sample_batch': 5.929719801971868e-05, 'time_algorithm_update': 0.0017449863543695585, 'loss': 2.4293271894721443e-11, 'time_step': 0.0020355576407999187, 'td_error': 5.288571257172608e-13, 'value_scale': -5.733316819683844e-05, 'environment': 0.007769784175578801} step=1249508
2023-01-10 01:34.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1249508.pt


Epoch 53/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:35.15 [info     ] DQN_20230110005015: epoch=53 step=1273537 epoch=53 metrics={'time_sample_batch': 5.91522557165219e-05, 'time_algorithm_update': 0.0017516298078558974, 'loss': 2.576083113992596e-11, 'time_step': 0.0020503254933742017, 'td_error': 7.281014687263377e-14, 'value_scale': 2.1064610253038515e-05, 'environment': -0.017435464612838094} step=1273537
2023-01-10 01:35.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1273537.pt


Epoch 54/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:36.05 [info     ] DQN_20230110005015: epoch=54 step=1297566 epoch=54 metrics={'time_sample_batch': 5.8919849941071574e-05, 'time_algorithm_update': 0.001747299259663164, 'loss': 2.517038538846752e-11, 'time_step': 0.002037156401633462, 'td_error': 1.4077479508720741e-15, 'value_scale': -7.006270988787084e-07, 'environment': -0.0809974373838694} step=1297566
2023-01-10 01:36.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1297566.pt


Epoch 55/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:36.58 [info     ] DQN_20230110005015: epoch=55 step=1321595 epoch=55 metrics={'time_sample_batch': 5.9857251983570186e-05, 'time_algorithm_update': 0.0018013044066224543, 'loss': 2.6032412545933494e-11, 'time_step': 0.0021074430401482294, 'td_error': 3.4662544342088153e-13, 'value_scale': 4.642040396940627e-05, 'environment': -0.21551527376767} step=1321595
2023-01-10 01:36.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1321595.pt


Epoch 56/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:37.51 [info     ] DQN_20230110005015: epoch=56 step=1345624 epoch=56 metrics={'time_sample_batch': 6.199325583113967e-05, 'time_algorithm_update': 0.0018317562804804931, 'loss': 2.644829560792128e-11, 'time_step': 0.002134745157416371, 'td_error': 7.346129800093213e-14, 'value_scale': -2.1249020899423654e-05, 'environment': -0.5457278833715893} step=1345624
2023-01-10 01:37.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1345624.pt


Epoch 57/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:38.43 [info     ] DQN_20230110005015: epoch=57 step=1369653 epoch=57 metrics={'time_sample_batch': 5.867149932161541e-05, 'time_algorithm_update': 0.001777119035056837, 'loss': 2.480957403333743e-11, 'time_step': 0.0020763819184641233, 'td_error': 1.3262079074772325e-13, 'value_scale': 2.856278512778176e-05, 'environment': -0.05420316573338283} step=1369653
2023-01-10 01:38.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1369653.pt


Epoch 58/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:39.35 [info     ] DQN_20230110005015: epoch=58 step=1393682 epoch=58 metrics={'time_sample_batch': 6.087425918067914e-05, 'time_algorithm_update': 0.0017738484072225848, 'loss': 2.4838866246677902e-11, 'time_step': 0.0020706364067560566, 'td_error': 1.6486316250255554e-15, 'value_scale': -2.287040160212958e-06, 'environment': -0.07599276866453544} step=1393682
2023-01-10 01:39.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1393682.pt


Epoch 59/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:40.28 [info     ] DQN_20230110005015: epoch=59 step=1417711 epoch=59 metrics={'time_sample_batch': 6.085957444568932e-05, 'time_algorithm_update': 0.0018237409750825582, 'loss': 2.5031705202884817e-11, 'time_step': 0.002132261165038016, 'td_error': 2.442724928633421e-13, 'value_scale': -3.89226311437854e-05, 'environment': -0.006559984689625617} step=1417711
2023-01-10 01:40.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1417711.pt


Epoch 60/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:41.21 [info     ] DQN_20230110005015: epoch=60 step=1441740 epoch=60 metrics={'time_sample_batch': 6.082268401008653e-05, 'time_algorithm_update': 0.0018214745350686535, 'loss': 2.5216445357979646e-11, 'time_step': 0.0021252308183952865, 'td_error': 9.863118791450113e-14, 'value_scale': 2.463050431826353e-05, 'environment': 0.027580503644607707} step=1441740
2023-01-10 01:41.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1441740.pt


Epoch 61/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:42.14 [info     ] DQN_20230110005015: epoch=61 step=1465769 epoch=61 metrics={'time_sample_batch': 5.9393313579074334e-05, 'time_algorithm_update': 0.0017989113703019719, 'loss': 2.5246523569391225e-11, 'time_step': 0.002104706629081941, 'td_error': 3.8825512332752336e-13, 'value_scale': -4.9121051503516586e-05, 'environment': -0.1561272187080851} step=1465769
2023-01-10 01:42.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1465769.pt


Epoch 62/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:43.07 [info     ] DQN_20230110005015: epoch=62 step=1489798 epoch=62 metrics={'time_sample_batch': 5.972831405708865e-05, 'time_algorithm_update': 0.0017895018783042908, 'loss': 2.6081022736288007e-11, 'time_step': 0.0020885098525719663, 'td_error': 3.4810646851239045e-13, 'value_scale': -4.643784550198681e-05, 'environment': -0.11663009815872638} step=1489798
2023-01-10 01:43.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1489798.pt


Epoch 63/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:43.58 [info     ] DQN_20230110005015: epoch=63 step=1513827 epoch=63 metrics={'time_sample_batch': 5.8443508888780266e-05, 'time_algorithm_update': 0.0017492149726416498, 'loss': 2.546197481785301e-11, 'time_step': 0.002050661118945665, 'td_error': 4.2678608519713914e-14, 'value_scale': -1.6110177740254453e-05, 'environment': -0.373972557988733} step=1513827
2023-01-10 01:43.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1513827.pt


Epoch 64/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:44.49 [info     ] DQN_20230110005015: epoch=64 step=1537856 epoch=64 metrics={'time_sample_batch': 5.865414553682001e-05, 'time_algorithm_update': 0.0017575089903408443, 'loss': 2.5947016497267905e-11, 'time_step': 0.002056045852512539, 'td_error': 6.121657210941541e-13, 'value_scale': 6.168144997465729e-05, 'environment': -0.5115080068792809} step=1537856
2023-01-10 01:44.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1537856.pt


Epoch 65/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:45.40 [info     ] DQN_20230110005015: epoch=65 step=1561885 epoch=65 metrics={'time_sample_batch': 5.8367376475552656e-05, 'time_algorithm_update': 0.001747597935266315, 'loss': 2.4378187840880734e-11, 'time_step': 0.00204593800214491, 'td_error': 1.0408831229865586e-13, 'value_scale': -2.534817624524082e-05, 'environment': -0.08690118194292418} step=1561885
2023-01-10 01:45.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1561885.pt


Epoch 66/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:46.33 [info     ] DQN_20230110005015: epoch=66 step=1585914 epoch=66 metrics={'time_sample_batch': 6.135432102730909e-05, 'time_algorithm_update': 0.0018176863695471925, 'loss': 2.4406004443061178e-11, 'time_step': 0.002120389727608701, 'td_error': 9.925419864879937e-13, 'value_scale': 7.858134645464842e-05, 'environment': 0.048494698989627535} step=1585914
2023-01-10 01:46.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1585914.pt


Epoch 67/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:47.26 [info     ] DQN_20230110005015: epoch=67 step=1609943 epoch=67 metrics={'time_sample_batch': 6.078088212595673e-05, 'time_algorithm_update': 0.001804335643510018, 'loss': 2.6242750763648717e-11, 'time_step': 0.0021160723956774583, 'td_error': 4.735406226396726e-13, 'value_scale': -5.426598092124657e-05, 'environment': -0.5090812501442341} step=1609943
2023-01-10 01:47.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1609943.pt


Epoch 68/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:48.18 [info     ] DQN_20230110005015: epoch=68 step=1633972 epoch=68 metrics={'time_sample_batch': 5.989983771504066e-05, 'time_algorithm_update': 0.0017855616761650941, 'loss': 2.5860092573599668e-11, 'time_step': 0.0020822309079432766, 'td_error': 7.204283351281155e-16, 'value_scale': -5.92712719770427e-07, 'environment': -0.04017033655503969} step=1633972
2023-01-10 01:48.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1633972.pt


Epoch 69/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:49.09 [info     ] DQN_20230110005015: epoch=69 step=1658001 epoch=69 metrics={'time_sample_batch': 5.811792450097226e-05, 'time_algorithm_update': 0.0017459579182651447, 'loss': 2.470259643897795e-11, 'time_step': 0.00204664006146496, 'td_error': 2.1940394857625047e-14, 'value_scale': 1.1421978014487889e-05, 'environment': -0.10031282072024394} step=1658001
2023-01-10 01:49.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1658001.pt


Epoch 70/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:50.01 [info     ] DQN_20230110005015: epoch=70 step=1682030 epoch=70 metrics={'time_sample_batch': 5.946207385845233e-05, 'time_algorithm_update': 0.001775248943978002, 'loss': 2.537953857178829e-11, 'time_step': 0.00207531075634568, 'td_error': 1.948989457199778e-15, 'value_scale': 2.7687544479976626e-06, 'environment': -0.06920615868243532} step=1682030
2023-01-10 01:50.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1682030.pt


Epoch 71/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:50.52 [info     ] DQN_20230110005015: epoch=71 step=1706059 epoch=71 metrics={'time_sample_batch': 5.8680161330835756e-05, 'time_algorithm_update': 0.0017517324025584614, 'loss': 2.5717459491647544e-11, 'time_step': 0.002055478376804247, 'td_error': 6.717294589035116e-13, 'value_scale': -6.46090454158439e-05, 'environment': -0.19693023393336762} step=1706059
2023-01-10 01:50.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1706059.pt


Epoch 72/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:51.45 [info     ] DQN_20230110005015: epoch=72 step=1730088 epoch=72 metrics={'time_sample_batch': 6.04736834232407e-05, 'time_algorithm_update': 0.0018058151801708332, 'loss': 2.5035877107001408e-11, 'time_step': 0.002111261626884203, 'td_error': 3.1413236124103287e-13, 'value_scale': 4.416890286595877e-05, 'environment': -0.4823748296034589} step=1730088
2023-01-10 01:51.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1730088.pt


Epoch 73/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:52.38 [info     ] DQN_20230110005015: epoch=73 step=1754117 epoch=73 metrics={'time_sample_batch': 6.038798608803323e-05, 'time_algorithm_update': 0.0018085501624520955, 'loss': 2.598822949738561e-11, 'time_step': 0.0021204256059882435, 'td_error': 2.315721289199896e-13, 'value_scale': 3.7885711472098454e-05, 'environment': -0.14854440233612468} step=1754117
2023-01-10 01:52.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1754117.pt


Epoch 74/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:53.30 [info     ] DQN_20230110005015: epoch=74 step=1778146 epoch=74 metrics={'time_sample_batch': 6.076849932239775e-05, 'time_algorithm_update': 0.0017884761396431339, 'loss': 2.4983345088610656e-11, 'time_step': 0.002091999282957567, 'td_error': 2.2201324979046953e-14, 'value_scale': 1.1568313693356186e-05, 'environment': 0.2039389525890424} step=1778146
2023-01-10 01:53.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1778146.pt


Epoch 75/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:54.23 [info     ] DQN_20230110005015: epoch=75 step=1802175 epoch=75 metrics={'time_sample_batch': 6.16684751349088e-05, 'time_algorithm_update': 0.0018127296960052715, 'loss': 2.553203417060022e-11, 'time_step': 0.0021292755004395122, 'td_error': 6.935495776113686e-14, 'value_scale': 2.067594090566728e-05, 'environment': -0.628212132312732} step=1802175
2023-01-10 01:54.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1802175.pt


Epoch 76/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:55.16 [info     ] DQN_20230110005015: epoch=76 step=1826204 epoch=76 metrics={'time_sample_batch': 6.0861529102981886e-05, 'time_algorithm_update': 0.0018083287305394144, 'loss': 2.5707058841256495e-11, 'time_step': 0.0021182385627644663, 'td_error': 1.6459815436733712e-12, 'value_scale': -0.00010120899009608041, 'environment': 0.1444786375009734} step=1826204
2023-01-10 01:55.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1826204.pt


Epoch 77/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:56.09 [info     ] DQN_20230110005015: epoch=77 step=1850233 epoch=77 metrics={'time_sample_batch': 5.92156977405252e-05, 'time_algorithm_update': 0.0017767033578354428, 'loss': 2.5192856222160362e-11, 'time_step': 0.0020885718261224708, 'td_error': 1.5418689803175847e-13, 'value_scale': 3.08118210633098e-05, 'environment': 0.07443232786952596} step=1850233
2023-01-10 01:56.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1850233.pt


Epoch 78/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:57.00 [info     ] DQN_20230110005015: epoch=78 step=1874262 epoch=78 metrics={'time_sample_batch': 5.907678808521612e-05, 'time_algorithm_update': 0.0017534477681255185, 'loss': 2.417594164368989e-11, 'time_step': 0.002060314000503684, 'td_error': 1.771691881418871e-13, 'value_scale': -3.3149444300355056e-05, 'environment': -0.010417153124837398} step=1874262
2023-01-10 01:57.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1874262.pt


Epoch 79/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:57.53 [info     ] DQN_20230110005015: epoch=79 step=1898291 epoch=79 metrics={'time_sample_batch': 5.8678444804380865e-05, 'time_algorithm_update': 0.0017910146838275526, 'loss': 2.522319925536878e-11, 'time_step': 0.002103060728264657, 'td_error': 7.798829063830364e-13, 'value_scale': -6.964886005112237e-05, 'environment': -0.17322318898165137} step=1898291
2023-01-10 01:57.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1898291.pt


Epoch 80/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:58.47 [info     ] DQN_20230110005015: epoch=80 step=1922320 epoch=80 metrics={'time_sample_batch': 6.120600520391194e-05, 'time_algorithm_update': 0.001835507456345169, 'loss': 2.5081149664712637e-11, 'time_step': 0.002150842921108465, 'td_error': 3.691176850000283e-13, 'value_scale': -4.78659878129028e-05, 'environment': 0.1936275985546833} step=1922320
2023-01-10 01:58.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1922320.pt


Epoch 81/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 01:59.38 [info     ] DQN_20230110005015: epoch=81 step=1946349 epoch=81 metrics={'time_sample_batch': 5.939775868804423e-05, 'time_algorithm_update': 0.0017595891822733342, 'loss': 2.4099044226172625e-11, 'time_step': 0.002072251787137646, 'td_error': 1.0754475141082303e-13, 'value_scale': -2.5729036056569392e-05, 'environment': -0.1257620249515976} step=1946349
2023-01-10 01:59.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1946349.pt


Epoch 82/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:00.31 [info     ] DQN_20230110005015: epoch=82 step=1970378 epoch=82 metrics={'time_sample_batch': 5.998159596930829e-05, 'time_algorithm_update': 0.001775873958049947, 'loss': 2.5897021010980873e-11, 'time_step': 0.0020887304411045973, 'td_error': 2.5583662514175174e-12, 'value_scale': -0.00012619625851779267, 'environment': -0.16998094526716837} step=1970378
2023-01-10 02:00.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1970378.pt


Epoch 83/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:01.23 [info     ] DQN_20230110005015: epoch=83 step=1994407 epoch=83 metrics={'time_sample_batch': 5.977113791939679e-05, 'time_algorithm_update': 0.001788311640844893, 'loss': 2.4461012767719566e-11, 'time_step': 0.002107680327605852, 'td_error': 8.823938490468474e-15, 'value_scale': 7.050418618619992e-06, 'environment': -0.17080244228781613} step=1994407
2023-01-10 02:01.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_1994407.pt


Epoch 84/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:02.16 [info     ] DQN_20230110005015: epoch=84 step=2018436 epoch=84 metrics={'time_sample_batch': 6.098408710743744e-05, 'time_algorithm_update': 0.0017968502089922593, 'loss': 2.4134572919361912e-11, 'time_step': 0.002113688696070236, 'td_error': 5.2758871635951145e-14, 'value_scale': 1.798880166269048e-05, 'environment': -0.6039288546369824} step=2018436
2023-01-10 02:02.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2018436.pt


Epoch 85/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:03.10 [info     ] DQN_20230110005015: epoch=85 step=2042465 epoch=85 metrics={'time_sample_batch': 6.105911816554081e-05, 'time_algorithm_update': 0.001820383419570437, 'loss': 2.5300841379122252e-11, 'time_step': 0.002143187550471672, 'td_error': 2.3985591248505735e-13, 'value_scale': 3.8540826712966375e-05, 'environment': -0.2530298671653751} step=2042465
2023-01-10 02:03.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2042465.pt


Epoch 86/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:04.01 [info     ] DQN_20230110005015: epoch=86 step=2066494 epoch=86 metrics={'time_sample_batch': 6.0582548904530075e-05, 'time_algorithm_update': 0.0017603007271384185, 'loss': 2.4568961008242887e-11, 'time_step': 0.0020724918924768477, 'td_error': 3.039655662229501e-13, 'value_scale': 4.3412904921754375e-05, 'environment': -0.24851183468471022} step=2066494
2023-01-10 02:04.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2066494.pt


Epoch 87/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:04.53 [info     ] DQN_20230110005015: epoch=87 step=2090523 epoch=87 metrics={'time_sample_batch': 5.8294528283461276e-05, 'time_algorithm_update': 0.0017573459004833931, 'loss': 2.5445175525098366e-11, 'time_step': 0.0020735690177883506, 'td_error': 9.189773514276328e-13, 'value_scale': 7.560348728462717e-05, 'environment': -0.6219751300670449} step=2090523
2023-01-10 02:04.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2090523.pt


Epoch 88/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:05.45 [info     ] DQN_20230110005015: epoch=88 step=2114552 epoch=88 metrics={'time_sample_batch': 5.984946312075464e-05, 'time_algorithm_update': 0.0017737710246224594, 'loss': 2.5224198074459036e-11, 'time_step': 0.0020852001415139812, 'td_error': 1.1484269094314826e-14, 'value_scale': -8.136320803478882e-06, 'environment': -0.49048045126234713} step=2114552
2023-01-10 02:05.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2114552.pt


Epoch 89/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:06.36 [info     ] DQN_20230110005015: epoch=89 step=2138581 epoch=89 metrics={'time_sample_batch': 5.8218098206686575e-05, 'time_algorithm_update': 0.001749005635791099, 'loss': 2.5965827956678733e-11, 'time_step': 0.0020611722637311294, 'td_error': 6.283131665711159e-13, 'value_scale': 6.248493297304669e-05, 'environment': -0.29512703264353046} step=2138581
2023-01-10 02:06.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2138581.pt


Epoch 90/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:07.29 [info     ] DQN_20230110005015: epoch=90 step=2162610 epoch=90 metrics={'time_sample_batch': 6.132750154171619e-05, 'time_algorithm_update': 0.0017878195037803696, 'loss': 2.436119103503592e-11, 'time_step': 0.002102107847717709, 'td_error': 2.7870987459886194e-13, 'value_scale': -4.147498724938879e-05, 'environment': -0.35989760791303205} step=2162610
2023-01-10 02:07.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2162610.pt


Epoch 91/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:08.21 [info     ] DQN_20230110005015: epoch=91 step=2186639 epoch=91 metrics={'time_sample_batch': 6.002591807147012e-05, 'time_algorithm_update': 0.0017900415026266935, 'loss': 2.501472830913986e-11, 'time_step': 0.0021096845161126514, 'td_error': 9.019587290786307e-13, 'value_scale': -7.486691081503645e-05, 'environment': -0.516915875721826} step=2186639
2023-01-10 02:08.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2186639.pt


Epoch 92/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:09.15 [info     ] DQN_20230110005015: epoch=92 step=2210668 epoch=92 metrics={'time_sample_batch': 6.211345237145497e-05, 'time_algorithm_update': 0.0018168436245126686, 'loss': 2.546799944802476e-11, 'time_step': 0.0021354168153661455, 'td_error': 2.1980201803254768e-13, 'value_scale': -3.687397400983703e-05, 'environment': -0.29091503937256996} step=2210668
2023-01-10 02:09.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2210668.pt


Epoch 93/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:10.08 [info     ] DQN_20230110005015: epoch=93 step=2234697 epoch=93 metrics={'time_sample_batch': 6.006010969091263e-05, 'time_algorithm_update': 0.0018000516895845225, 'loss': 2.4974475522343632e-11, 'time_step': 0.0021212154562104493, 'td_error': 4.3949132594252034e-13, 'value_scale': -5.216207497424624e-05, 'environment': -0.08796438923077447} step=2234697
2023-01-10 02:10.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2234697.pt


Epoch 94/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:11.01 [info     ] DQN_20230110005015: epoch=94 step=2258726 epoch=94 metrics={'time_sample_batch': 6.004153548557415e-05, 'time_algorithm_update': 0.0018106039516279583, 'loss': 2.5219642191751573e-11, 'time_step': 0.0021284434812148018, 'td_error': 4.3596519465867907e-13, 'value_scale': -5.201363355540074e-05, 'environment': -0.15001191764325109} step=2258726
2023-01-10 02:11.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2258726.pt


Epoch 95/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:11.54 [info     ] DQN_20230110005015: epoch=95 step=2282755 epoch=95 metrics={'time_sample_batch': 6.029312071557537e-05, 'time_algorithm_update': 0.0017894752175725897, 'loss': 2.6053851779368516e-11, 'time_step': 0.002110585940554425, 'td_error': 2.5654163503575713e-13, 'value_scale': -3.9889131581983594e-05, 'environment': -0.21837941227024743} step=2282755
2023-01-10 02:11.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2282755.pt


Epoch 96/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:12.48 [info     ] DQN_20230110005015: epoch=96 step=2306784 epoch=96 metrics={'time_sample_batch': 6.061249385736742e-05, 'time_algorithm_update': 0.0018330010002911266, 'loss': 2.5902480763508032e-11, 'time_step': 0.0021540543635074376, 'td_error': 2.7415348236653172e-14, 'value_scale': 1.2871815391483831e-05, 'environment': -0.32765443402253613} step=2306784
2023-01-10 02:12.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2306784.pt


Epoch 97/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:13.41 [info     ] DQN_20230110005015: epoch=97 step=2330813 epoch=97 metrics={'time_sample_batch': 5.996884604737456e-05, 'time_algorithm_update': 0.0018215397233854664, 'loss': 2.587730620379447e-11, 'time_step': 0.0021455252412166323, 'td_error': 2.930867152646256e-14, 'value_scale': -1.3319955166668467e-05, 'environment': -0.32541792691588844} step=2330813
2023-01-10 02:13.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2330813.pt


Epoch 98/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:14.33 [info     ] DQN_20230110005015: epoch=98 step=2354842 epoch=98 metrics={'time_sample_batch': 5.8484943654535315e-05, 'time_algorithm_update': 0.0017580708005196235, 'loss': 2.443924683591231e-11, 'time_step': 0.002069927530940778, 'td_error': 2.1516221529117164e-15, 'value_scale': 2.752959948026226e-06, 'environment': -0.46628287091150283} step=2354842
2023-01-10 02:14.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2354842.pt


Epoch 99/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:15.26 [info     ] DQN_20230110005015: epoch=99 step=2378871 epoch=99 metrics={'time_sample_batch': 6.063458049256156e-05, 'time_algorithm_update': 0.0018111457687384914, 'loss': 2.4621745821628402e-11, 'time_step': 0.002134443167825729, 'td_error': 1.4308985203947591e-13, 'value_scale': 2.9784593027257853e-05, 'environment': -0.10347028984939313} step=2378871
2023-01-10 02:15.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2378871.pt


Epoch 100/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:16.20 [info     ] DQN_20230110005015: epoch=100 step=2402900 epoch=100 metrics={'time_sample_batch': 6.092163729525777e-05, 'time_algorithm_update': 0.0018246731481687461, 'loss': 2.5860930216788466e-11, 'time_step': 0.00214488001578984, 'td_error': 1.1160298301670978e-15, 'value_scale': 4.096072826452265e-07, 'environment': -0.4538481563960072} step=2402900
2023-01-10 02:16.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2402900.pt


Epoch 101/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:17.13 [info     ] DQN_20230110005015: epoch=101 step=2426929 epoch=101 metrics={'time_sample_batch': 6.069743711158892e-05, 'time_algorithm_update': 0.001804339890176623, 'loss': 2.534878682570453e-11, 'time_step': 0.002128750511241508, 'td_error': 4.209562859280738e-14, 'value_scale': 1.607753022882376e-05, 'environment': -0.1470839228955343} step=2426929
2023-01-10 02:17.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2426929.pt


Epoch 102/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:18.06 [info     ] DQN_20230110005015: epoch=102 step=2450958 epoch=102 metrics={'time_sample_batch': 5.990491783957767e-05, 'time_algorithm_update': 0.001800870452859269, 'loss': 2.434345212698397e-11, 'time_step': 0.0021185478748783665, 'td_error': 4.18597615263392e-15, 'value_scale': 4.886396641508733e-06, 'environment': -0.23625697661916673} step=2450958
2023-01-10 02:18.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2450958.pt


Epoch 103/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:18.58 [info     ] DQN_20230110005015: epoch=103 step=2474987 epoch=103 metrics={'time_sample_batch': 5.9599544806617134e-05, 'time_algorithm_update': 0.0017782236148695001, 'loss': 2.5465864960476413e-11, 'time_step': 0.0020989177577171816, 'td_error': 1.317423447028997e-12, 'value_scale': 9.055783662650899e-05, 'environment': 0.14403339863070758} step=2474987
2023-01-10 02:18.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2474987.pt


Epoch 104/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:19.51 [info     ] DQN_20230110005015: epoch=104 step=2499016 epoch=104 metrics={'time_sample_batch': 6.034768244375713e-05, 'time_algorithm_update': 0.0018014446359194889, 'loss': 2.6241945405536026e-11, 'time_step': 0.0021211215632055786, 'td_error': 7.212558457870563e-13, 'value_scale': 6.699163396546535e-05, 'environment': 0.006399737606021166} step=2499016
2023-01-10 02:19.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2499016.pt


Epoch 105/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:20.45 [info     ] DQN_20230110005015: epoch=105 step=2523045 epoch=105 metrics={'time_sample_batch': 6.0589742440251434e-05, 'time_algorithm_update': 0.0018194575172629744, 'loss': 2.6068284894652334e-11, 'time_step': 0.0021526828489478616, 'td_error': 1.301373003904876e-12, 'value_scale': 8.99969613643491e-05, 'environment': -0.4130243928165676} step=2523045
2023-01-10 02:20.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2523045.pt


Epoch 106/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:21.39 [info     ] DQN_20230110005015: epoch=106 step=2547074 epoch=106 metrics={'time_sample_batch': 6.277071332766812e-05, 'time_algorithm_update': 0.001837182428968886, 'loss': 2.55656740562363e-11, 'time_step': 0.0021753775016254114, 'td_error': 6.230348852077777e-15, 'value_scale': -5.952614881045298e-06, 'environment': -0.1714931169622123} step=2547074
2023-01-10 02:21.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2547074.pt


Epoch 107/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:22.31 [info     ] DQN_20230110005015: epoch=107 step=2571103 epoch=107 metrics={'time_sample_batch': 5.828079607182215e-05, 'time_algorithm_update': 0.0017605916634893452, 'loss': 2.413977512751774e-11, 'time_step': 0.002079254232783895, 'td_error': 1.5323002321822364e-14, 'value_scale': 9.596421964871489e-06, 'environment': -0.20077530444192232} step=2571103
2023-01-10 02:22.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2571103.pt


Epoch 108/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:23.23 [info     ] DQN_20230110005015: epoch=108 step=2595132 epoch=108 metrics={'time_sample_batch': 5.939282739528075e-05, 'time_algorithm_update': 0.0017748697106968901, 'loss': 2.4576096541453335e-11, 'time_step': 0.0020954594232501338, 'td_error': 1.591871254050589e-13, 'value_scale': 3.143699809380749e-05, 'environment': -0.09463523473820386} step=2595132
2023-01-10 02:23.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2595132.pt


Epoch 109/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:24.16 [info     ] DQN_20230110005015: epoch=109 step=2619161 epoch=109 metrics={'time_sample_batch': 6.058676580478053e-05, 'time_algorithm_update': 0.0017971239106238095, 'loss': 2.481820471539426e-11, 'time_step': 0.0021273932646879556, 'td_error': 1.973520438264231e-14, 'value_scale': -1.0873199078673108e-05, 'environment': -0.04948759380556596} step=2619161
2023-01-10 02:24.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2619161.pt


Epoch 110/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:25.09 [info     ] DQN_20230110005015: epoch=110 step=2643190 epoch=110 metrics={'time_sample_batch': 5.9157008411157116e-05, 'time_algorithm_update': 0.001770548380151998, 'loss': 2.5199996181174764e-11, 'time_step': 0.0020907275551410942, 'td_error': 5.301336962010322e-13, 'value_scale': 5.742132182821619e-05, 'environment': -0.20607713760048094} step=2643190
2023-01-10 02:25.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2643190.pt


Epoch 111/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:26.02 [info     ] DQN_20230110005015: epoch=111 step=2667219 epoch=111 metrics={'time_sample_batch': 5.9771971377328645e-05, 'time_algorithm_update': 0.0017948709051579968, 'loss': 2.4963210953186103e-11, 'time_step': 0.0021207880907337727, 'td_error': 1.5540877078238141e-13, 'value_scale': 3.1046186584854725e-05, 'environment': -0.06618698300610555} step=2667219
2023-01-10 02:26.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2667219.pt


Epoch 112/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:26.54 [info     ] DQN_20230110005015: epoch=112 step=2691248 epoch=112 metrics={'time_sample_batch': 5.99463724495692e-05, 'time_algorithm_update': 0.0017906610892999633, 'loss': 2.6120456949656878e-11, 'time_step': 0.002111558953079274, 'td_error': 2.8078459119213613e-13, 'value_scale': 4.177049372161171e-05, 'environment': -0.10146036260324272} step=2691248
2023-01-10 02:26.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2691248.pt


Epoch 113/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:27.47 [info     ] DQN_20230110005015: epoch=113 step=2715277 epoch=113 metrics={'time_sample_batch': 5.9604049448296444e-05, 'time_algorithm_update': 0.001788071257686381, 'loss': 2.5972894834566446e-11, 'time_step': 0.0021132251743947064, 'td_error': 8.002714780453595e-14, 'value_scale': -2.2210490537486904e-05, 'environment': -0.054807213694292124} step=2715277
2023-01-10 02:27.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2715277.pt


Epoch 114/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:28.40 [info     ] DQN_20230110005015: epoch=114 step=2739306 epoch=114 metrics={'time_sample_batch': 6.086465457022634e-05, 'time_algorithm_update': 0.001805372971205275, 'loss': 2.482059539087374e-11, 'time_step': 0.002127153089893926, 'td_error': 6.222959440601345e-13, 'value_scale': -6.22142005130076e-05, 'environment': -0.40488031732125657} step=2739306
2023-01-10 02:28.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2739306.pt


Epoch 115/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:29.34 [info     ] DQN_20230110005015: epoch=115 step=2763335 epoch=115 metrics={'time_sample_batch': 6.075443968085682e-05, 'time_algorithm_update': 0.001825875450923919, 'loss': 2.534503017047321e-11, 'time_step': 0.002156506476119899, 'td_error': 4.63011403532724e-13, 'value_scale': 5.366941869832614e-05, 'environment': -0.46606471741846056} step=2763335
2023-01-10 02:29.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2763335.pt


Epoch 116/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:30.28 [info     ] DQN_20230110005015: epoch=116 step=2787364 epoch=116 metrics={'time_sample_batch': 6.0898667591540593e-05, 'time_algorithm_update': 0.0018179123854784986, 'loss': 2.5279795541696282e-11, 'time_step': 0.002145320379241406, 'td_error': 4.8639786638567734e-14, 'value_scale': -1.7251100671252474e-05, 'environment': -0.14436199382165543} step=2787364
2023-01-10 02:30.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2787364.pt


Epoch 117/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:31.20 [info     ] DQN_20230110005015: epoch=117 step=2811393 epoch=117 metrics={'time_sample_batch': 5.8819785378657854e-05, 'time_algorithm_update': 0.0017660251841115874, 'loss': 2.5665468158484553e-11, 'time_step': 0.002090307670941568, 'td_error': 1.374995911651794e-13, 'value_scale': 2.9207550064353645e-05, 'environment': -0.03986871400432398} step=2811393
2023-01-10 02:31.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2811393.pt


Epoch 118/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:32.13 [info     ] DQN_20230110005015: epoch=118 step=2835422 epoch=118 metrics={'time_sample_batch': 5.922610604255515e-05, 'time_algorithm_update': 0.0017999065488389608, 'loss': 2.504572148649277e-11, 'time_step': 0.0021213459816758484, 'td_error': 5.4057952586986996e-14, 'value_scale': -1.825820267336792e-05, 'environment': -0.40587667651101755} step=2835422
2023-01-10 02:32.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2835422.pt


Epoch 119/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:33.06 [info     ] DQN_20230110005015: epoch=119 step=2859451 epoch=119 metrics={'time_sample_batch': 6.0402303704648306e-05, 'time_algorithm_update': 0.0018193384716883745, 'loss': 2.5632005301380445e-11, 'time_step': 0.002149519766953173, 'td_error': 8.480562389472248e-16, 'value_scale': 8.033929399448766e-07, 'environment': -0.2290164529465098} step=2859451
2023-01-10 02:33.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2859451.pt


Epoch 120/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:34.00 [info     ] DQN_20230110005015: epoch=120 step=2883480 epoch=120 metrics={'time_sample_batch': 6.029488685262144e-05, 'time_algorithm_update': 0.0018151122347246599, 'loss': 2.4483738638897356e-11, 'time_step': 0.002139473413874373, 'td_error': 8.124268127513054e-14, 'value_scale': -2.2420703748514137e-05, 'environment': -0.30376141347863683} step=2883480
2023-01-10 02:34.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2883480.pt


Epoch 121/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:34.53 [info     ] DQN_20230110005015: epoch=121 step=2907509 epoch=121 metrics={'time_sample_batch': 5.997980998802574e-05, 'time_algorithm_update': 0.0017822869504960101, 'loss': 2.533692585653102e-11, 'time_step': 0.002109825043073233, 'td_error': 9.820045190455263e-14, 'value_scale': -2.4654815798148085e-05, 'environment': -0.24653314775827312} step=2907509
2023-01-10 02:34.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2907509.pt


Epoch 122/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:35.46 [info     ] DQN_20230110005015: epoch=122 step=2931538 epoch=122 metrics={'time_sample_batch': 6.033947685197566e-05, 'time_algorithm_update': 0.001796449424870338, 'loss': 2.546430995348095e-11, 'time_step': 0.002118441083119789, 'td_error': 3.7794191153110006e-14, 'value_scale': -1.5249890599029561e-05, 'environment': 0.016242793935581545} step=2931538
2023-01-10 02:35.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2931538.pt


Epoch 123/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:36.38 [info     ] DQN_20230110005015: epoch=123 step=2955567 epoch=123 metrics={'time_sample_batch': 5.981817868195539e-05, 'time_algorithm_update': 0.0017782184256016624, 'loss': 2.532064192174644e-11, 'time_step': 0.0021055950257824704, 'td_error': 6.506241629428992e-13, 'value_scale': -6.362869161681092e-05, 'environment': -0.5004641120557295} step=2955567
2023-01-10 02:36.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2955567.pt


Epoch 124/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:37.30 [info     ] DQN_20230110005015: epoch=124 step=2979596 epoch=124 metrics={'time_sample_batch': 5.9947076919963977e-05, 'time_algorithm_update': 0.0017621528691053638, 'loss': 2.549293496042468e-11, 'time_step': 0.002083671530056369, 'td_error': 7.102392414185257e-15, 'value_scale': -6.26102244861671e-06, 'environment': -0.32765257140431203} step=2979596
2023-01-10 02:37.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_2979596.pt


Epoch 125/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:38.22 [info     ] DQN_20230110005015: epoch=125 step=3003625 epoch=125 metrics={'time_sample_batch': 5.946927731629193e-05, 'time_algorithm_update': 0.001756325946417168, 'loss': 2.563065389945054e-11, 'time_step': 0.002081873741453122, 'td_error': 7.33945305670793e-16, 'value_scale': -8.472721744254983e-07, 'environment': -0.7908854027288659} step=3003625
2023-01-10 02:38.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3003625.pt


Epoch 126/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:39.14 [info     ] DQN_20230110005015: epoch=126 step=3027654 epoch=126 metrics={'time_sample_batch': 5.894659997183681e-05, 'time_algorithm_update': 0.001775783537786459, 'loss': 2.4744101261568222e-11, 'time_step': 0.002096471826584381, 'td_error': 8.756890826081673e-15, 'value_scale': -7.168500194004873e-06, 'environment': -0.2930820152327006} step=3027654
2023-01-10 02:39.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3027654.pt


Epoch 127/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:40.06 [info     ] DQN_20230110005015: epoch=127 step=3051683 epoch=127 metrics={'time_sample_batch': 5.958743982236877e-05, 'time_algorithm_update': 0.0017605263561071133, 'loss': 2.5019112687684498e-11, 'time_step': 0.0020883421489295353, 'td_error': 5.792125863419627e-15, 'value_scale': 5.7131412684234815e-06, 'environment': -0.1881242051032104} step=3051683
2023-01-10 02:40.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3051683.pt


Epoch 128/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:41.00 [info     ] DQN_20230110005015: epoch=128 step=3075712 epoch=128 metrics={'time_sample_batch': 6.008262297719093e-05, 'time_algorithm_update': 0.0018030499155847137, 'loss': 2.6020445010313278e-11, 'time_step': 0.002129771755183104, 'td_error': 2.4788186411091216e-15, 'value_scale': 3.388100862641195e-06, 'environment': -0.29992408395363523} step=3075712
2023-01-10 02:41.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3075712.pt


Epoch 129/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:41.53 [info     ] DQN_20230110005015: epoch=129 step=3099741 epoch=129 metrics={'time_sample_batch': 5.9700641269327435e-05, 'time_algorithm_update': 0.0018045793605002578, 'loss': 2.595132277160338e-11, 'time_step': 0.0021353328345573926, 'td_error': 3.7627965357042377e-13, 'value_scale': -4.8374945602605435e-05, 'environment': -0.1657140960338897} step=3099741
2023-01-10 02:41.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3099741.pt


Epoch 130/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:42.46 [info     ] DQN_20230110005015: epoch=130 step=3123770 epoch=130 metrics={'time_sample_batch': 6.0395834483558196e-05, 'time_algorithm_update': 0.0017990593289291127, 'loss': 2.5296914584938678e-11, 'time_step': 0.002123635917265737, 'td_error': 6.482024445809066e-13, 'value_scale': -6.349459649294621e-05, 'environment': -0.41631542223397916} step=3123770
2023-01-10 02:42.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3123770.pt


Epoch 131/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:43.38 [info     ] DQN_20230110005015: epoch=131 step=3147799 epoch=131 metrics={'time_sample_batch': 5.890729846150257e-05, 'time_algorithm_update': 0.0017577162633908019, 'loss': 2.4708114678573516e-11, 'time_step': 0.0020838447900850122, 'td_error': 8.159541553306118e-14, 'value_scale': -2.246436599345228e-05, 'environment': 0.20270841238874807} step=3147799
2023-01-10 02:43.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3147799.pt


Epoch 132/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:44.30 [info     ] DQN_20230110005015: epoch=132 step=3171828 epoch=132 metrics={'time_sample_batch': 5.928153099502347e-05, 'time_algorithm_update': 0.0017613966449418611, 'loss': 2.5356748364751838e-11, 'time_step': 0.002084528007302532, 'td_error': 2.7149710306913525e-14, 'value_scale': 1.2842601937408365e-05, 'environment': -0.2421452704561676} step=3171828
2023-01-10 02:44.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3171828.pt


Epoch 133/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:45.23 [info     ] DQN_20230110005015: epoch=133 step=3195857 epoch=133 metrics={'time_sample_batch': 6.029086839473571e-05, 'time_algorithm_update': 0.0017950037722432998, 'loss': 2.547991700899935e-11, 'time_step': 0.0021275904767100217, 'td_error': 5.6002785239418665e-14, 'value_scale': 1.8534497214355526e-05, 'environment': -0.5230656019933908} step=3195857
2023-01-10 02:45.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3195857.pt


Epoch 134/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:46.17 [info     ] DQN_20230110005015: epoch=134 step=3219886 epoch=134 metrics={'time_sample_batch': 6.053353364044244e-05, 'time_algorithm_update': 0.001840507201802354, 'loss': 2.600613439510433e-11, 'time_step': 0.00216908331662311, 'td_error': 8.599025386496662e-13, 'value_scale': 7.31330008235208e-05, 'environment': -0.11852540306285224} step=3219886
2023-01-10 02:46.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3219886.pt


Epoch 135/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:47.10 [info     ] DQN_20230110005015: epoch=135 step=3243915 epoch=135 metrics={'time_sample_batch': 5.920981392441104e-05, 'time_algorithm_update': 0.0017635312398486412, 'loss': 2.4872095011957582e-11, 'time_step': 0.002091943193223177, 'td_error': 6.3689891883468e-13, 'value_scale': 6.290777036549255e-05, 'environment': -0.39253832240571424} step=3243915
2023-01-10 02:47.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3243915.pt


Epoch 136/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:48.03 [info     ] DQN_20230110005015: epoch=136 step=3267944 epoch=136 metrics={'time_sample_batch': 6.123006634063512e-05, 'time_algorithm_update': 0.0018311512991652675, 'loss': 2.4351329394067713e-11, 'time_step': 0.002162007100027774, 'td_error': 1.9391019152787356e-13, 'value_scale': -3.469639486879928e-05, 'environment': -0.09095460094786438} step=3267944
2023-01-10 02:48.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3267944.pt


Epoch 137/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:48.57 [info     ] DQN_20230110005015: epoch=137 step=3291973 epoch=137 metrics={'time_sample_batch': 5.977147527141683e-05, 'time_algorithm_update': 0.0018008903268620963, 'loss': 2.4747162189449177e-11, 'time_step': 0.002133106539433872, 'td_error': 4.637889516214019e-13, 'value_scale': -5.371495710210627e-05, 'environment': -0.1611242512459419} step=3291973
2023-01-10 02:48.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3291973.pt


Epoch 138/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:49.50 [info     ] DQN_20230110005015: epoch=138 step=3316002 epoch=138 metrics={'time_sample_batch': 5.962966835758273e-05, 'time_algorithm_update': 0.001814107997293721, 'loss': 2.5545138567021882e-11, 'time_step': 0.002139989006826407, 'td_error': 3.6374566198955546e-13, 'value_scale': -4.756587818506551e-05, 'environment': -0.47394184826633695} step=3316002
2023-01-10 02:49.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3316002.pt


Epoch 139/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:50.42 [info     ] DQN_20230110005015: epoch=139 step=3340031 epoch=139 metrics={'time_sample_batch': 5.8647338963709866e-05, 'time_algorithm_update': 0.0017539159631187377, 'loss': 2.5168372180863942e-11, 'time_step': 0.0020797194610637612, 'td_error': 6.845869013354253e-15, 'value_scale': -6.3942751316826175e-06, 'environment': -0.16892831132065506} step=3340031
2023-01-10 02:50.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3340031.pt


Epoch 140/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:51.35 [info     ] DQN_20230110005015: epoch=140 step=3364060 epoch=140 metrics={'time_sample_batch': 5.9504441303321595e-05, 'time_algorithm_update': 0.0017977873034490926, 'loss': 2.453258559547055e-11, 'time_step': 0.002124705710131863, 'td_error': 2.966215622287135e-13, 'value_scale': 4.29433089675782e-05, 'environment': -0.07021141834608984} step=3364060
2023-01-10 02:51.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3364060.pt


Epoch 141/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:52.29 [info     ] DQN_20230110005015: epoch=141 step=3388089 epoch=141 metrics={'time_sample_batch': 5.991859051850738e-05, 'time_algorithm_update': 0.0018073562240425957, 'loss': 2.5097381819050745e-11, 'time_step': 0.00213893823466094, 'td_error': 8.113299341281355e-15, 'value_scale': 6.864719464481257e-06, 'environment': -0.12851230470499056} step=3388089
2023-01-10 02:52.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3388089.pt


Epoch 142/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:53.24 [info     ] DQN_20230110005015: epoch=142 step=3412118 epoch=142 metrics={'time_sample_batch': 6.24007869934618e-05, 'time_algorithm_update': 0.0018584941764951243, 'loss': 2.5901521945572503e-11, 'time_step': 0.0021927210864832627, 'td_error': 1.1018251898602943e-12, 'value_scale': -8.281526333813498e-05, 'environment': 0.06738434468377205} step=3412118
2023-01-10 02:53.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3412118.pt


Epoch 143/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:54.16 [info     ] DQN_20230110005015: epoch=143 step=3436147 epoch=143 metrics={'time_sample_batch': 5.950000611646994e-05, 'time_algorithm_update': 0.0017584507184268939, 'loss': 2.5716718113048168e-11, 'time_step': 0.0020869919966905225, 'td_error': 1.7139644755422895e-15, 'value_scale': -1.5332359562255632e-06, 'environment': -0.009573219763848506} step=3436147
2023-01-10 02:54.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3436147.pt


Epoch 144/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:55.08 [info     ] DQN_20230110005015: epoch=144 step=3460176 epoch=144 metrics={'time_sample_batch': 5.887207494176348e-05, 'time_algorithm_update': 0.001769333902957749, 'loss': 2.546129945765689e-11, 'time_step': 0.0020959889567782903, 'td_error': 7.622325274761144e-13, 'value_scale': 6.882863463918158e-05, 'environment': -0.29251522751499986} step=3460176
2023-01-10 02:55.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3460176.pt


Epoch 145/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:56.01 [info     ] DQN_20230110005015: epoch=145 step=3484205 epoch=145 metrics={'time_sample_batch': 6.024764764769812e-05, 'time_algorithm_update': 0.0017938108062013872, 'loss': 2.6140232376445865e-11, 'time_step': 0.0021288617084205827, 'td_error': 1.1999278152136035e-14, 'value_scale': 8.43609145876978e-06, 'environment': 0.1685917667702354} step=3484205
2023-01-10 02:56.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3484205.pt


Epoch 146/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:56.55 [info     ] DQN_20230110005015: epoch=146 step=3508234 epoch=146 metrics={'time_sample_batch': 5.996156321258907e-05, 'time_algorithm_update': 0.0018195226460470779, 'loss': 2.4291680938847147e-11, 'time_step': 0.0021528384575281623, 'td_error': 6.886926298335666e-13, 'value_scale': -6.546499682108128e-05, 'environment': -0.06319171426466631} step=3508234
2023-01-10 02:56.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3508234.pt


Epoch 147/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:57.48 [info     ] DQN_20230110005015: epoch=147 step=3532263 epoch=147 metrics={'time_sample_batch': 6.027447705540924e-05, 'time_algorithm_update': 0.0017817402417811866, 'loss': 2.442893343316116e-11, 'time_step': 0.002113109849614445, 'td_error': 4.079983001222091e-14, 'value_scale': -1.585203225557388e-05, 'environment': -0.3176452749998602} step=3532263
2023-01-10 02:57.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3532263.pt


Epoch 148/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:58.42 [info     ] DQN_20230110005015: epoch=148 step=3556292 epoch=148 metrics={'time_sample_batch': 6.080335572376209e-05, 'time_algorithm_update': 0.0018153747442068394, 'loss': 2.566049001537997e-11, 'time_step': 0.0021477368218388083, 'td_error': 3.5101584926496667e-15, 'value_scale': -4.22490213826186e-06, 'environment': 0.046313524377259614} step=3556292
2023-01-10 02:58.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3556292.pt


Epoch 149/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 02:59.35 [info     ] DQN_20230110005015: epoch=149 step=3580321 epoch=149 metrics={'time_sample_batch': 6.0540121926951384e-05, 'time_algorithm_update': 0.0018018262008983865, 'loss': 2.5447446245210628e-11, 'time_step': 0.002134485098697396, 'td_error': 4.2584359503044836e-13, 'value_scale': 5.145675569012954e-05, 'environment': 0.20019276607408787} step=3580321
2023-01-10 02:59.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3580321.pt


Epoch 150/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:00.28 [info     ] DQN_20230110005015: epoch=150 step=3604350 epoch=150 metrics={'time_sample_batch': 5.961890285929628e-05, 'time_algorithm_update': 0.001790022501770271, 'loss': 2.614269189232903e-11, 'time_step': 0.0021197737128979964, 'td_error': 1.9383522500751216e-13, 'value_scale': -3.468920362724229e-05, 'environment': 0.26079541719834964} step=3604350
2023-01-10 03:00.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3604350.pt


Epoch 151/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:01.21 [info     ] DQN_20230110005015: epoch=151 step=3628379 epoch=151 metrics={'time_sample_batch': 5.9438231008330346e-05, 'time_algorithm_update': 0.001796732274694902, 'loss': 2.4545378082184885e-11, 'time_step': 0.002128905881690971, 'td_error': 1.7971684037075112e-13, 'value_scale': 3.340545987894717e-05, 'environment': -0.2781690782821376} step=3628379
2023-01-10 03:01.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3628379.pt


Epoch 152/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:02.14 [info     ] DQN_20230110005015: epoch=152 step=3652408 epoch=152 metrics={'time_sample_batch': 5.954957701917881e-05, 'time_algorithm_update': 0.001788389509628812, 'loss': 2.3866295339376176e-11, 'time_step': 0.0021151885631513177, 'td_error': 2.4256693296734382e-14, 'value_scale': -1.2139806087610839e-05, 'environment': -0.33798315565750064} step=3652408
2023-01-10 03:02.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3652408.pt


Epoch 153/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:03.06 [info     ] DQN_20230110005015: epoch=153 step=3676437 epoch=153 metrics={'time_sample_batch': 5.9708479742734164e-05, 'time_algorithm_update': 0.0017653798098530215, 'loss': 2.6096146411743245e-11, 'time_step': 0.002096525802907587, 'td_error': 9.346145166549597e-15, 'value_scale': 7.436952638541154e-06, 'environment': 0.03917059736730862} step=3676437
2023-01-10 03:03.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3676437.pt


Epoch 154/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:04.00 [info     ] DQN_20230110005015: epoch=154 step=3700466 epoch=154 metrics={'time_sample_batch': 6.076087913559222e-05, 'time_algorithm_update': 0.0018172066748189373, 'loss': 2.5120229822523213e-11, 'time_step': 0.002152957354270989, 'td_error': 2.0589665769526503e-14, 'value_scale': -1.112759887015503e-05, 'environment': -0.36059299815319845} step=3700466
2023-01-10 03:04.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3700466.pt


Epoch 155/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:04.53 [info     ] DQN_20230110005015: epoch=155 step=3724495 epoch=155 metrics={'time_sample_batch': 5.960541870061306e-05, 'time_algorithm_update': 0.0018006719807281868, 'loss': 2.4452502263706895e-11, 'time_step': 0.002134384547951189, 'td_error': 1.3077311508292627e-14, 'value_scale': 8.797984782565557e-06, 'environment': 0.07304120214308166} step=3724495
2023-01-10 03:04.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3724495.pt


Epoch 156/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:05.47 [info     ] DQN_20230110005015: epoch=156 step=3748524 epoch=156 metrics={'time_sample_batch': 6.11079548315002e-05, 'time_algorithm_update': 0.001827319158815783, 'loss': 2.4390967674591835e-11, 'time_step': 0.002162521383260201, 'td_error': 5.606657414322036e-13, 'value_scale': 5.9060959715432566e-05, 'environment': -0.006360630737912487} step=3748524
2023-01-10 03:05.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3748524.pt


Epoch 157/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:06.41 [info     ] DQN_20230110005015: epoch=157 step=3772553 epoch=157 metrics={'time_sample_batch': 6.153853507448539e-05, 'time_algorithm_update': 0.0018106968424988872, 'loss': 2.52717590906289e-11, 'time_step': 0.002148879879625992, 'td_error': 1.0669263966500817e-14, 'value_scale': -7.961318833928592e-06, 'environment': -0.2000755265067387} step=3772553
2023-01-10 03:06.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3772553.pt


Epoch 158/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:07.34 [info     ] DQN_20230110005015: epoch=158 step=3796582 epoch=158 metrics={'time_sample_batch': 5.948574803256429e-05, 'time_algorithm_update': 0.0018070881581742039, 'loss': 2.5750723693354896e-11, 'time_step': 0.0021354971646796233, 'td_error': 2.8830666924650755e-13, 'value_scale': 4.233508023040558e-05, 'environment': -0.1257115961711839} step=3796582
2023-01-10 03:07.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3796582.pt


Epoch 159/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:08.27 [info     ] DQN_20230110005015: epoch=159 step=3820611 epoch=159 metrics={'time_sample_batch': 6.040940794130554e-05, 'time_algorithm_update': 0.001801002893293488, 'loss': 2.572099380925117e-11, 'time_step': 0.0021364930576091255, 'td_error': 8.82634832876864e-14, 'value_scale': 2.335626740866231e-05, 'environment': -0.43239555685298897} step=3820611
2023-01-10 03:08.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3820611.pt


Epoch 160/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:09.20 [info     ] DQN_20230110005015: epoch=160 step=3844640 epoch=160 metrics={'time_sample_batch': 6.077894731290064e-05, 'time_algorithm_update': 0.0017994411221167296, 'loss': 2.5623135701539795e-11, 'time_step': 0.0021315168573385153, 'td_error': 5.881676713881237e-15, 'value_scale': 5.6016787841393075e-06, 'environment': -0.49147020370614786} step=3844640
2023-01-10 03:09.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3844640.pt


Epoch 161/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:10.13 [info     ] DQN_20230110005015: epoch=161 step=3868669 epoch=161 metrics={'time_sample_batch': 5.951705231560001e-05, 'time_algorithm_update': 0.001771351843520424, 'loss': 2.6327600050950104e-11, 'time_step': 0.0021055084751450946, 'td_error': 4.463568829921595e-14, 'value_scale': 1.6562133605621028e-05, 'environment': 0.06736669365568398} step=3868669
2023-01-10 03:10.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3868669.pt


Epoch 162/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:11.06 [info     ] DQN_20230110005015: epoch=162 step=3892698 epoch=162 metrics={'time_sample_batch': 5.935013252050968e-05, 'time_algorithm_update': 0.001782190686104881, 'loss': 2.4709005662492247e-11, 'time_step': 0.002111365064966817, 'td_error': 3.9355209889644824e-13, 'value_scale': 4.9469138737855185e-05, 'environment': 0.27339048256314846} step=3892698
2023-01-10 03:11.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3892698.pt


Epoch 163/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:11.59 [info     ] DQN_20230110005015: epoch=163 step=3916727 epoch=163 metrics={'time_sample_batch': 6.013382110729056e-05, 'time_algorithm_update': 0.0018134778237202933, 'loss': 2.577737514333072e-11, 'time_step': 0.0021485118980269597, 'td_error': 1.6868743790191326e-14, 'value_scale': -1.0077244396099143e-05, 'environment': 0.106339312148316} step=3916727
2023-01-10 03:11.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3916727.pt


Epoch 164/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:12.52 [info     ] DQN_20230110005015: epoch=164 step=3940756 epoch=164 metrics={'time_sample_batch': 5.9721596783042634e-05, 'time_algorithm_update': 0.0017785878260636024, 'loss': 2.4046833958020812e-11, 'time_step': 0.002106629982091469, 'td_error': 5.667046364834035e-13, 'value_scale': 5.9386048955421006e-05, 'environment': -0.13925872219318491} step=3940756
2023-01-10 03:12.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3940756.pt


Epoch 165/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:13.46 [info     ] DQN_20230110005015: epoch=165 step=3964785 epoch=165 metrics={'time_sample_batch': 6.0195368006710726e-05, 'time_algorithm_update': 0.001813252978598939, 'loss': 2.561184257031834e-11, 'time_step': 0.0021472607586058277, 'td_error': 8.354681109157498e-16, 'value_scale': 1.8401557222625445e-06, 'environment': -0.29062906238279995} step=3964785
2023-01-10 03:13.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3964785.pt


Epoch 166/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:14.38 [info     ] DQN_20230110005015: epoch=166 step=3988814 epoch=166 metrics={'time_sample_batch': 6.054739483961864e-05, 'time_algorithm_update': 0.0017757080503109169, 'loss': 2.5402972064130228e-11, 'time_step': 0.00210404440706661, 'td_error': 2.2725726989824268e-13, 'value_scale': 3.7594349217381046e-05, 'environment': -0.21729025088041626} step=3988814
2023-01-10 03:14.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_3988814.pt


Epoch 167/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:15.32 [info     ] DQN_20230110005015: epoch=167 step=4012843 epoch=167 metrics={'time_sample_batch': 6.127458688516167e-05, 'time_algorithm_update': 0.0018242935775356142, 'loss': 2.694551915071086e-11, 'time_step': 0.0021636434954557878, 'td_error': 3.764896117305724e-14, 'value_scale': 1.5188419137493555e-05, 'environment': -0.006273814282648874} step=4012843
2023-01-10 03:15.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4012843.pt


Epoch 168/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:16.26 [info     ] DQN_20230110005015: epoch=168 step=4036872 epoch=168 metrics={'time_sample_batch': 6.0406123720169306e-05, 'time_algorithm_update': 0.0018084872363782404, 'loss': 2.5760250965590472e-11, 'time_step': 0.002141099986405333, 'td_error': 1.132572506667033e-12, 'value_scale': -8.396429273153416e-05, 'environment': 0.10846291848946163} step=4036872
2023-01-10 03:16.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4036872.pt


Epoch 169/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:17.19 [info     ] DQN_20230110005015: epoch=169 step=4060901 epoch=169 metrics={'time_sample_batch': 6.103157436531667e-05, 'time_algorithm_update': 0.001823250534700253, 'loss': 2.6093840204459073e-11, 'time_step': 0.002160830783300262, 'td_error': 4.376772218774587e-13, 'value_scale': -5.2186194915872374e-05, 'environment': 0.023264994633063356} step=4060901
2023-01-10 03:17.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4060901.pt


Epoch 170/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:18.12 [info     ] DQN_20230110005015: epoch=170 step=4084930 epoch=170 metrics={'time_sample_batch': 5.999257975419594e-05, 'time_algorithm_update': 0.0017756739083020655, 'loss': 2.6435294992845695e-11, 'time_step': 0.00210663976530005, 'td_error': 6.645054020574279e-14, 'value_scale': -2.027460487551358e-05, 'environment': -0.15825575118964522} step=4084930
2023-01-10 03:18.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4084930.pt


Epoch 171/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:19.05 [info     ] DQN_20230110005015: epoch=171 step=4108959 epoch=171 metrics={'time_sample_batch': 6.028225599610655e-05, 'time_algorithm_update': 0.001789857903750848, 'loss': 2.548395335639174e-11, 'time_step': 0.0021230352719161807, 'td_error': 5.638075500883702e-16, 'value_scale': -6.39231531549187e-07, 'environment': -0.3828254269583263} step=4108959
2023-01-10 03:19.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4108959.pt


Epoch 172/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:19.57 [info     ] DQN_20230110005015: epoch=172 step=4132988 epoch=172 metrics={'time_sample_batch': 5.8636613153896355e-05, 'time_algorithm_update': 0.0017652837339821388, 'loss': 2.436618720786203e-11, 'time_step': 0.0020923052017070305, 'td_error': 2.1342565468839064e-13, 'value_scale': -3.641755359975274e-05, 'environment': -0.05461549060423274} step=4132988
2023-01-10 03:19.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4132988.pt


Epoch 173/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:20.51 [info     ] DQN_20230110005015: epoch=173 step=4157017 epoch=173 metrics={'time_sample_batch': 6.020234325583089e-05, 'time_algorithm_update': 0.0017955422852884602, 'loss': 2.4603851743423093e-11, 'time_step': 0.0021304813354687774, 'td_error': 6.950627547976149e-13, 'value_scale': 6.577529665095516e-05, 'environment': -0.26091014422502823} step=4157017
2023-01-10 03:20.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4157017.pt


Epoch 174/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:21.44 [info     ] DQN_20230110005015: epoch=174 step=4181046 epoch=174 metrics={'time_sample_batch': 5.9967595860476774e-05, 'time_algorithm_update': 0.0018085057014402782, 'loss': 2.5761491752168515e-11, 'time_step': 0.0021441905873263046, 'td_error': 5.541863694305895e-14, 'value_scale': -1.849222254247647e-05, 'environment': -0.18437972083682902} step=4181046
2023-01-10 03:21.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4181046.pt


Epoch 175/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:22.37 [info     ] DQN_20230110005015: epoch=175 step=4205075 epoch=175 metrics={'time_sample_batch': 5.972961385457761e-05, 'time_algorithm_update': 0.0017849788707841267, 'loss': 2.436214437441935e-11, 'time_step': 0.0021210733317888314, 'td_error': 1.1414591240297209e-14, 'value_scale': 8.300911666017906e-06, 'environment': -0.26318000196561814} step=4205075
2023-01-10 03:22.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4205075.pt


Epoch 176/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:23.31 [info     ] DQN_20230110005015: epoch=176 step=4229104 epoch=176 metrics={'time_sample_batch': 6.0459901601010394e-05, 'time_algorithm_update': 0.001806273492734289, 'loss': 2.5254895328583118e-11, 'time_step': 0.002142097744693063, 'td_error': 2.8936827482773124e-15, 'value_scale': 4.049756116630633e-06, 'environment': -0.18581089699055456} step=4229104
2023-01-10 03:23.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4229104.pt


Epoch 177/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:24.24 [info     ] DQN_20230110005015: epoch=177 step=4253133 epoch=177 metrics={'time_sample_batch': 5.939747094661537e-05, 'time_algorithm_update': 0.001800289016730618, 'loss': 2.509527055325759e-11, 'time_step': 0.002137094308207894, 'td_error': 5.904172821256143e-13, 'value_scale': 6.06112092264664e-05, 'environment': -0.10613881583781493} step=4253133
2023-01-10 03:24.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4253133.pt


Epoch 178/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:25.18 [info     ] DQN_20230110005015: epoch=178 step=4277162 epoch=178 metrics={'time_sample_batch': 6.072093268757262e-05, 'time_algorithm_update': 0.0018132357736459171, 'loss': 2.528563164376291e-11, 'time_step': 0.002149787912198511, 'td_error': 1.547527926932787e-14, 'value_scale': 9.661037874012923e-06, 'environment': 0.01044030159196645} step=4277162
2023-01-10 03:25.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4277162.pt


Epoch 179/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:26.11 [info     ] DQN_20230110005015: epoch=179 step=4301191 epoch=179 metrics={'time_sample_batch': 5.9459166677809075e-05, 'time_algorithm_update': 0.0017908565451071013, 'loss': 2.5900382456010677e-11, 'time_step': 0.0021261364101267193, 'td_error': 4.4496416774433633e-13, 'value_scale': -5.2618791035232214e-05, 'environment': -0.34942545237478734} step=4301191
2023-01-10 03:26.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4301191.pt


Epoch 180/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:27.04 [info     ] DQN_20230110005015: epoch=180 step=4325220 epoch=180 metrics={'time_sample_batch': 5.954890231513874e-05, 'time_algorithm_update': 0.001781352167892726, 'loss': 2.430333192139313e-11, 'time_step': 0.00211393081559944, 'td_error': 1.7076273661996598e-14, 'value_scale': 1.0183613871634266e-05, 'environment': -0.04834566487180039} step=4325220
2023-01-10 03:27.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4325220.pt


Epoch 181/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:27.58 [info     ] DQN_20230110005015: epoch=181 step=4349249 epoch=181 metrics={'time_sample_batch': 6.0635433794729885e-05, 'time_algorithm_update': 0.0018222285962103805, 'loss': 2.519508597871068e-11, 'time_step': 0.002161918386368623, 'td_error': 1.4152102188138694e-13, 'value_scale': -2.9644235065287367e-05, 'environment': -0.08924372312094067} step=4349249
2023-01-10 03:27.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4349249.pt


Epoch 182/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:28.52 [info     ] DQN_20230110005015: epoch=182 step=4373278 epoch=182 metrics={'time_sample_batch': 6.114424001789057e-05, 'time_algorithm_update': 0.0018295601882848843, 'loss': 2.4648858270226122e-11, 'time_step': 0.0021655178530454646, 'td_error': 2.0993258557539414e-13, 'value_scale': -3.610832117764121e-05, 'environment': -0.517006929368574} step=4373278
2023-01-10 03:28.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4373278.pt


Epoch 183/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:29.45 [info     ] DQN_20230110005015: epoch=183 step=4397307 epoch=183 metrics={'time_sample_batch': 6.0873346345801395e-05, 'time_algorithm_update': 0.001801055589663441, 'loss': 2.4275856151627927e-11, 'time_step': 0.0021399280552540814, 'td_error': 2.3632326765735284e-13, 'value_scale': -3.832541511310057e-05, 'environment': -0.16176724108140286} step=4397307
2023-01-10 03:29.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4397307.pt


Epoch 184/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:30.38 [info     ] DQN_20230110005015: epoch=184 step=4421336 epoch=184 metrics={'time_sample_batch': 5.943412325138049e-05, 'time_algorithm_update': 0.0017749386694186327, 'loss': 2.511726117081077e-11, 'time_step': 0.0021045629964983516, 'td_error': 1.063585127762169e-12, 'value_scale': 8.13676662887207e-05, 'environment': -0.4448928335169188} step=4421336
2023-01-10 03:30.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4421336.pt


Epoch 185/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:31.31 [info     ] DQN_20230110005015: epoch=185 step=4445365 epoch=185 metrics={'time_sample_batch': 6.0894877342374304e-05, 'time_algorithm_update': 0.0018136231629082196, 'loss': 2.7493017520562854e-11, 'time_step': 0.002153999514037827, 'td_error': 2.264370392283717e-12, 'value_scale': 0.00011873861169529626, 'environment': -0.554662087074642} step=4445365
2023-01-10 03:31.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4445365.pt


Epoch 186/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:32.25 [info     ] DQN_20230110005015: epoch=186 step=4469394 epoch=186 metrics={'time_sample_batch': 6.0610777330912526e-05, 'time_algorithm_update': 0.0018212880389342828, 'loss': 2.6646968721236005e-11, 'time_step': 0.0021577584192326088, 'td_error': 2.1487296487771895e-13, 'value_scale': -3.6543750949725234e-05, 'environment': -0.0461424029066291} step=4469394
2023-01-10 03:32.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4469394.pt


Epoch 187/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:33.20 [info     ] DQN_20230110005015: epoch=187 step=4493423 epoch=187 metrics={'time_sample_batch': 6.051735066559893e-05, 'time_algorithm_update': 0.0018263578841568077, 'loss': 2.504689088613027e-11, 'time_step': 0.0021688405223898666, 'td_error': 2.549184581091472e-14, 'value_scale': 1.2480879982213651e-05, 'environment': -0.20999466900354796} step=4493423
2023-01-10 03:33.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4493423.pt


Epoch 188/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:34.12 [info     ] DQN_20230110005015: epoch=188 step=4517452 epoch=188 metrics={'time_sample_batch': 5.996174181071732e-05, 'time_algorithm_update': 0.0017656062325911751, 'loss': 2.440705808466369e-11, 'time_step': 0.0020961333831313387, 'td_error': 3.561309371064747e-14, 'value_scale': 1.4779727783830653e-05, 'environment': -0.11714373339895204} step=4517452
2023-01-10 03:34.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4517452.pt


Epoch 189/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:35.04 [info     ] DQN_20230110005015: epoch=189 step=4541481 epoch=189 metrics={'time_sample_batch': 5.865386771750939e-05, 'time_algorithm_update': 0.0017558844319998863, 'loss': 2.4165861158540652e-11, 'time_step': 0.0020895553461705322, 'td_error': 6.9944394610750354e-15, 'value_scale': 6.378116360958512e-06, 'environment': -0.781162504683801} step=4541481
2023-01-10 03:35.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4541481.pt


Epoch 190/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:35.57 [info     ] DQN_20230110005015: epoch=190 step=4565510 epoch=190 metrics={'time_sample_batch': 5.9936609085224616e-05, 'time_algorithm_update': 0.0017907073660594177, 'loss': 2.478269474062159e-11, 'time_step': 0.002123608879493543, 'td_error': 6.287407589183071e-15, 'value_scale': 6.080668314301785e-06, 'environment': -0.8636019419215033} step=4565510
2023-01-10 03:35.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4565510.pt


Epoch 191/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:36.50 [info     ] DQN_20230110005015: epoch=191 step=4589539 epoch=191 metrics={'time_sample_batch': 6.065586343617856e-05, 'time_algorithm_update': 0.001793877998786084, 'loss': 2.592354548736725e-11, 'time_step': 0.0021300654399607818, 'td_error': 1.1204676476737576e-13, 'value_scale': -2.6348039678717827e-05, 'environment': -0.00287409563540989} step=4589539
2023-01-10 03:36.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4589539.pt


Epoch 192/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:37.44 [info     ] DQN_20230110005015: epoch=192 step=4613568 epoch=192 metrics={'time_sample_batch': 6.001113411529794e-05, 'time_algorithm_update': 0.0017971281176019417, 'loss': 2.5273271647614476e-11, 'time_step': 0.0021338172012304335, 'td_error': 3.685326759535939e-13, 'value_scale': -4.788713088014927e-05, 'environment': -0.5991697588966913} step=4613568
2023-01-10 03:37.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4613568.pt


Epoch 193/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:38.37 [info     ] DQN_20230110005015: epoch=193 step=4637597 epoch=193 metrics={'time_sample_batch': 6.006714447274221e-05, 'time_algorithm_update': 0.001796200449157433, 'loss': 2.6230917974274395e-11, 'time_step': 0.0021338464714792307, 'td_error': 1.7040908097987262e-13, 'value_scale': -3.252559449013635e-05, 'environment': -0.21664424998525816} step=4637597
2023-01-10 03:38.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4637597.pt


Epoch 194/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:39.30 [info     ] DQN_20230110005015: epoch=194 step=4661626 epoch=194 metrics={'time_sample_batch': 6.05975908357764e-05, 'time_algorithm_update': 0.0018000057997876791, 'loss': 2.5654758365107574e-11, 'time_step': 0.0021366964411887344, 'td_error': 7.767692005594135e-15, 'value_scale': -6.7465786312758846e-06, 'environment': -0.4331785416501986} step=4661626
2023-01-10 03:39.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4661626.pt


Epoch 195/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:40.24 [info     ] DQN_20230110005015: epoch=195 step=4685655 epoch=195 metrics={'time_sample_batch': 6.05566620980514e-05, 'time_algorithm_update': 0.001803939205275884, 'loss': 2.4998534887101846e-11, 'time_step': 0.002143702200822474, 'td_error': 5.036437423625148e-15, 'value_scale': 4.992061254756894e-06, 'environment': -0.024917813250879644} step=4685655
2023-01-10 03:40.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4685655.pt


Epoch 196/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:41.18 [info     ] DQN_20230110005015: epoch=196 step=4709684 epoch=196 metrics={'time_sample_batch': 6.123689275798173e-05, 'time_algorithm_update': 0.0018174579723075096, 'loss': 2.5305469499547845e-11, 'time_step': 0.0021558283787153896, 'td_error': 1.518047933813284e-13, 'value_scale': -3.0690438115274724e-05, 'environment': -0.16430980741764173} step=4709684
2023-01-10 03:41.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4709684.pt


Epoch 197/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:42.11 [info     ] DQN_20230110005015: epoch=197 step=4733713 epoch=197 metrics={'time_sample_batch': 6.044848124292034e-05, 'time_algorithm_update': 0.001790692056230979, 'loss': 2.5386320271878245e-11, 'time_step': 0.002129058255660727, 'td_error': 3.558222715592384e-14, 'value_scale': 1.4755473455706212e-05, 'environment': 0.053696253407453855} step=4733713
2023-01-10 03:42.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4733713.pt


Epoch 198/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:43.05 [info     ] DQN_20230110005015: epoch=198 step=4757742 epoch=198 metrics={'time_sample_batch': 5.97240971568382e-05, 'time_algorithm_update': 0.0018299511098212788, 'loss': 2.5158997040123946e-11, 'time_step': 0.0021657832498640506, 'td_error': 4.921349252407987e-13, 'value_scale': 5.534396056147664e-05, 'environment': -0.2322495677402257} step=4757742
2023-01-10 03:43.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4757742.pt


Epoch 199/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:43.58 [info     ] DQN_20230110005015: epoch=199 step=4781771 epoch=199 metrics={'time_sample_batch': 6.016738763328418e-05, 'time_algorithm_update': 0.0017887939351681262, 'loss': 2.4285374878911386e-11, 'time_step': 0.0021248483703478656, 'td_error': 3.8393579221845787e-14, 'value_scale': 1.5343877560187328e-05, 'environment': -0.035431274419486035} step=4781771
2023-01-10 03:43.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4781771.pt


Epoch 200/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:44.50 [info     ] DQN_20230110005015: epoch=200 step=4805800 epoch=200 metrics={'time_sample_batch': 5.9899420986074726e-05, 'time_algorithm_update': 0.0017788703583803828, 'loss': 2.4947353233410444e-11, 'time_step': 0.0021095698461921935, 'td_error': 8.047646896184191e-14, 'value_scale': -2.2349495528426705e-05, 'environment': -0.5356650781202942} step=4805800
2023-01-10 03:44.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4805800.pt


Epoch 201/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:45.43 [info     ] DQN_20230110005015: epoch=201 step=4829829 epoch=201 metrics={'time_sample_batch': 5.980888165716792e-05, 'time_algorithm_update': 0.0017908859641876723, 'loss': 2.641603884384507e-11, 'time_step': 0.002128470181634976, 'td_error': 3.0843320242115726e-13, 'value_scale': -4.380756359452379e-05, 'environment': 0.10128318612271278} step=4829829
2023-01-10 03:45.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4829829.pt


Epoch 202/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:46.36 [info     ] DQN_20230110005015: epoch=202 step=4853858 epoch=202 metrics={'time_sample_batch': 5.950950158362214e-05, 'time_algorithm_update': 0.0017908548385027648, 'loss': 2.522855738724608e-11, 'time_step': 0.0021237744994911443, 'td_error': 4.7882196866929883e-14, 'value_scale': 1.7225053144009347e-05, 'environment': -0.45438720804559} step=4853858
2023-01-10 03:46.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4853858.pt


Epoch 203/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:47.30 [info     ] DQN_20230110005015: epoch=203 step=4877887 epoch=203 metrics={'time_sample_batch': 6.0396380200061195e-05, 'time_algorithm_update': 0.0018089419174464217, 'loss': 2.5724301517854675e-11, 'time_step': 0.002150089743035261, 'td_error': 3.2481170867908537e-13, 'value_scale': 4.496140631026646e-05, 'environment': 0.203744269830556} step=4877887
2023-01-10 03:47.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4877887.pt


Epoch 204/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:48.25 [info     ] DQN_20230110005015: epoch=204 step=4901916 epoch=204 metrics={'time_sample_batch': 6.0678654541767484e-05, 'time_algorithm_update': 0.0018529342381644325, 'loss': 2.46411785852467e-11, 'time_step': 0.002190895833456738, 'td_error': 5.860491317785779e-16, 'value_scale': -1.4220912632105538e-06, 'environment': -0.32372082862209145} step=4901916
2023-01-10 03:48.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4901916.pt


Epoch 205/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:49.18 [info     ] DQN_20230110005015: epoch=205 step=4925945 epoch=205 metrics={'time_sample_batch': 6.013681758699795e-05, 'time_algorithm_update': 0.001785874917437816, 'loss': 2.4891496696056338e-11, 'time_step': 0.002122545496315797, 'td_error': 1.2667727808961656e-13, 'value_scale': 2.8058343474077472e-05, 'environment': -0.06398388626255391} step=4925945
2023-01-10 03:49.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4925945.pt


Epoch 206/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:50.11 [info     ] DQN_20230110005015: epoch=206 step=4949974 epoch=206 metrics={'time_sample_batch': 6.022405285053205e-05, 'time_algorithm_update': 0.0018093811596986272, 'loss': 2.529185185186867e-11, 'time_step': 0.0021458657782366226, 'td_error': 5.185847565771569e-14, 'value_scale': -1.791465985293295e-05, 'environment': -0.4600396351109812} step=4949974
2023-01-10 03:50.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4949974.pt


Epoch 207/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:51.06 [info     ] DQN_20230110005015: epoch=207 step=4974003 epoch=207 metrics={'time_sample_batch': 6.0731866861869096e-05, 'time_algorithm_update': 0.0018243439818962548, 'loss': 2.5758490102413772e-11, 'time_step': 0.002163994272101798, 'td_error': 1.6457517771444423e-13, 'value_scale': 3.1994441783370824e-05, 'environment': -0.21834167809513289} step=4974003
2023-01-10 03:51.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4974003.pt


Epoch 208/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:51.58 [info     ] DQN_20230110005015: epoch=208 step=4998032 epoch=208 metrics={'time_sample_batch': 5.917061163525917e-05, 'time_algorithm_update': 0.0017756611087695407, 'loss': 2.451964721141705e-11, 'time_step': 0.0021077578987262237, 'td_error': 4.7638570622963064e-14, 'value_scale': -1.7185144182280416e-05, 'environment': -0.18289552420858485} step=4998032
2023-01-10 03:51.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_4998032.pt


Epoch 209/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:52.52 [info     ] DQN_20230110005015: epoch=209 step=5022061 epoch=209 metrics={'time_sample_batch': 6.143386664921e-05, 'time_algorithm_update': 0.0018151047931359826, 'loss': 2.4355648576576926e-11, 'time_step': 0.0021565557989696523, 'td_error': 2.0021985538250748e-13, 'value_scale': -3.528047007770879e-05, 'environment': -0.3592542828799493} step=5022061
2023-01-10 03:52.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5022061.pt


Epoch 210/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:53.45 [info     ] DQN_20230110005015: epoch=210 step=5046090 epoch=210 metrics={'time_sample_batch': 6.040951708460614e-05, 'time_algorithm_update': 0.0018082356511482391, 'loss': 2.5352443608963526e-11, 'time_step': 0.0021466796102186055, 'td_error': 1.114369470448633e-14, 'value_scale': 8.231200821641151e-06, 'environment': -0.0002716295579972661} step=5046090
2023-01-10 03:53.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5046090.pt


Epoch 211/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:54.40 [info     ] DQN_20230110005015: epoch=211 step=5070119 epoch=211 metrics={'time_sample_batch': 6.158521864078748e-05, 'time_algorithm_update': 0.0018277629255539043, 'loss': 2.573287465446921e-11, 'time_step': 0.0021703365892996635, 'td_error': 4.563238001691513e-13, 'value_scale': 5.328692904800766e-05, 'environment': -0.12955484513784565} step=5070119
2023-01-10 03:54.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5070119.pt


Epoch 212/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:55.33 [info     ] DQN_20230110005015: epoch=212 step=5094148 epoch=212 metrics={'time_sample_batch': 6.026927786545339e-05, 'time_algorithm_update': 0.0018063948898509109, 'loss': 2.483023924220619e-11, 'time_step': 0.002143572131774514, 'td_error': 2.378827665213118e-14, 'value_scale': -1.206095068361457e-05, 'environment': -0.17138941966979165} step=5094148
2023-01-10 03:55.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5094148.pt


Epoch 213/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:56.25 [info     ] DQN_20230110005015: epoch=213 step=5118177 epoch=213 metrics={'time_sample_batch': 5.89394758909431e-05, 'time_algorithm_update': 0.0017542669778955857, 'loss': 2.4293256499674608e-11, 'time_step': 0.002089630853490311, 'td_error': 7.980641517047838e-13, 'value_scale': -7.048246997481297e-05, 'environment': -0.28028180899998567} step=5118177
2023-01-10 03:56.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5118177.pt


Epoch 214/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:57.19 [info     ] DQN_20230110005015: epoch=214 step=5142206 epoch=214 metrics={'time_sample_batch': 6.0948178961540045e-05, 'time_algorithm_update': 0.0018002539519647706, 'loss': 2.6127047507506786e-11, 'time_step': 0.0021388516641793276, 'td_error': 8.950543084541102e-14, 'value_scale': -2.3559880171740297e-05, 'environment': -0.5061535485164923} step=5142206
2023-01-10 03:57.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5142206.pt


Epoch 215/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:58.12 [info     ] DQN_20230110005015: epoch=215 step=5166235 epoch=215 metrics={'time_sample_batch': 6.0360224001207894e-05, 'time_algorithm_update': 0.001805079544402671, 'loss': 2.585769813395301e-11, 'time_step': 0.002143801471615429, 'td_error': 1.6987173285726234e-13, 'value_scale': 3.247757155060995e-05, 'environment': -0.010947414798011363} step=5166235
2023-01-10 03:58.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5166235.pt


Epoch 216/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:59.06 [info     ] DQN_20230110005015: epoch=216 step=5190264 epoch=216 metrics={'time_sample_batch': 6.136810284953939e-05, 'time_algorithm_update': 0.0018200410171922185, 'loss': 2.5004031868817225e-11, 'time_step': 0.00216236065486689, 'td_error': 2.2885142292872867e-14, 'value_scale': -1.1790268454593757e-05, 'environment': -0.11757984403766501} step=5190264
2023-01-10 03:59.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5190264.pt


Epoch 217/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 03:59.59 [info     ] DQN_20230110005015: epoch=217 step=5214293 epoch=217 metrics={'time_sample_batch': 6.034057820709989e-05, 'time_algorithm_update': 0.0017952780989682988, 'loss': 2.619130500852698e-11, 'time_step': 0.002137474245959401, 'td_error': 1.1325292622535981e-13, 'value_scale': 2.6512914372650027e-05, 'environment': -0.3277829689152122} step=5214293
2023-01-10 03:59.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5214293.pt


Epoch 218/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:00.53 [info     ] DQN_20230110005015: epoch=218 step=5238322 epoch=218 metrics={'time_sample_batch': 6.0103757089034385e-05, 'time_algorithm_update': 0.0018251393686604363, 'loss': 2.436336726574466e-11, 'time_step': 0.002159845030775598, 'td_error': 2.359315354831246e-13, 'value_scale': -3.8303790341846746e-05, 'environment': -0.17954029002527272} step=5238322
2023-01-10 04:00.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5238322.pt


Epoch 219/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:01.46 [info     ] DQN_20230110005015: epoch=219 step=5262351 epoch=219 metrics={'time_sample_batch': 5.884197123503436e-05, 'time_algorithm_update': 0.001770420047474729, 'loss': 2.4410824732831778e-11, 'time_step': 0.0021036273605928987, 'td_error': 4.896892707370306e-14, 'value_scale': -1.741680483715383e-05, 'environment': -0.3709682651529765} step=5262351
2023-01-10 04:01.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5262351.pt


Epoch 220/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:02.39 [info     ] DQN_20230110005015: epoch=220 step=5286380 epoch=220 metrics={'time_sample_batch': 5.9110056947662645e-05, 'time_algorithm_update': 0.0017686236876565085, 'loss': 2.5026542091611623e-11, 'time_step': 0.002102940730166706, 'td_error': 2.3491565697143965e-14, 'value_scale': 1.1995359849254718e-05, 'environment': -0.44294018076421615} step=5286380
2023-01-10 04:02.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5286380.pt


Epoch 221/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:03.33 [info     ] DQN_20230110005015: epoch=221 step=5310409 epoch=221 metrics={'time_sample_batch': 6.0610797175148996e-05, 'time_algorithm_update': 0.0018264534738438966, 'loss': 2.554076682422813e-11, 'time_step': 0.0021669245415141883, 'td_error': 5.037644304381457e-13, 'value_scale': 5.59925242554811e-05, 'environment': -0.41328151145229236} step=5310409
2023-01-10 04:03.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5310409.pt


Epoch 222/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:04.26 [info     ] DQN_20230110005015: epoch=222 step=5334438 epoch=222 metrics={'time_sample_batch': 6.02278331775801e-05, 'time_algorithm_update': 0.0018082858372222787, 'loss': 2.503643650094721e-11, 'time_step': 0.0021447426440628576, 'td_error': 2.900129966224119e-14, 'value_scale': -1.3372663429125712e-05, 'environment': 0.08377369179765189} step=5334438
2023-01-10 04:04.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5334438.pt


Epoch 223/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:05.18 [info     ] DQN_20230110005015: epoch=223 step=5358467 epoch=223 metrics={'time_sample_batch': 5.86637203809181e-05, 'time_algorithm_update': 0.0017573709637540582, 'loss': 2.5351215928984353e-11, 'time_step': 0.0020942460275667723, 'td_error': 2.0232461970235906e-12, 'value_scale': -0.00011223665839778574, 'environment': -0.15067482585464734} step=5358467
2023-01-10 04:05.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5358467.pt


Epoch 224/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:06.13 [info     ] DQN_20230110005015: epoch=224 step=5382496 epoch=224 metrics={'time_sample_batch': 6.117141669973997e-05, 'time_algorithm_update': 0.0018421765684292673, 'loss': 2.5796476045899453e-11, 'time_step': 0.0021847347635926963, 'td_error': 8.754535769289936e-14, 'value_scale': 2.3308145637464912e-05, 'environment': -0.3023416946852874} step=5382496
2023-01-10 04:06.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5382496.pt


Epoch 225/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:07.05 [info     ] DQN_20230110005015: epoch=225 step=5406525 epoch=225 metrics={'time_sample_batch': 5.796347680850503e-05, 'time_algorithm_update': 0.0017579318809421963, 'loss': 2.5472475219128582e-11, 'time_step': 0.002093079930621044, 'td_error': 5.844344009721643e-13, 'value_scale': -6.031275406627955e-05, 'environment': -0.17106308123921204} step=5406525
2023-01-10 04:07.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5406525.pt


Epoch 226/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:07.59 [info     ] DQN_20230110005015: epoch=226 step=5430554 epoch=226 metrics={'time_sample_batch': 6.0227793489107155e-05, 'time_algorithm_update': 0.0018358762515578964, 'loss': 2.5371681187763604e-11, 'time_step': 0.0021760872505870946, 'td_error': 4.0661776156932646e-14, 'value_scale': -1.5871577089220067e-05, 'environment': -0.2034329311630909} step=5430554
2023-01-10 04:07.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5430554.pt


Epoch 227/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:08.54 [info     ] DQN_20230110005015: epoch=227 step=5454583 epoch=227 metrics={'time_sample_batch': 6.0968033120131e-05, 'time_algorithm_update': 0.0018292840557343664, 'loss': 2.434499307313485e-11, 'time_step': 0.0021717830356961605, 'td_error': 2.123343952045439e-13, 'value_scale': -3.634383338407401e-05, 'environment': 0.15871147104529654} step=5454583
2023-01-10 04:08.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5454583.pt


Epoch 228/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:09.48 [info     ] DQN_20230110005015: epoch=228 step=5478612 epoch=228 metrics={'time_sample_batch': 6.0866906891066e-05, 'time_algorithm_update': 0.0018191336196352666, 'loss': 2.469881867948325e-11, 'time_step': 0.002157995399104566, 'td_error': 1.5441721748611276e-14, 'value_scale': -9.72243556657648e-06, 'environment': -0.3532075328867566} step=5478612
2023-01-10 04:09.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5478612.pt


Epoch 229/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:10.40 [info     ] DQN_20230110005015: epoch=229 step=5502641 epoch=229 metrics={'time_sample_batch': 5.965458279647424e-05, 'time_algorithm_update': 0.0017576206737037127, 'loss': 2.550369760029047e-11, 'time_step': 0.0020940704556845803, 'td_error': 1.4295584396282675e-13, 'value_scale': -2.9784805713960578e-05, 'environment': -0.08138604837294205} step=5502641
2023-01-10 04:10.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5502641.pt


Epoch 230/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:11.34 [info     ] DQN_20230110005015: epoch=230 step=5526670 epoch=230 metrics={'time_sample_batch': 6.065192435523872e-05, 'time_algorithm_update': 0.0018306018519458106, 'loss': 2.4676522729432548e-11, 'time_step': 0.0021700061331518016, 'td_error': 7.398774279230178e-14, 'value_scale': 2.137974438121552e-05, 'environment': -0.05113643418501832} step=5526670
2023-01-10 04:11.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5526670.pt


Epoch 231/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:12.27 [info     ] DQN_20230110005015: epoch=231 step=5550699 epoch=231 metrics={'time_sample_batch': 5.9953665206472927e-05, 'time_algorithm_update': 0.0017822696661660424, 'loss': 2.567733065005802e-11, 'time_step': 0.0021190321734694834, 'td_error': 2.518074669179091e-14, 'value_scale': -1.2395359169146148e-05, 'environment': -0.22965079846581454} step=5550699
2023-01-10 04:12.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5550699.pt


Epoch 232/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:13.20 [info     ] DQN_20230110005015: epoch=232 step=5574728 epoch=232 metrics={'time_sample_batch': 5.981687888446642e-05, 'time_algorithm_update': 0.0018058488459180091, 'loss': 2.641649987038171e-11, 'time_step': 0.0021462675644924856, 'td_error': 7.762913818255048e-15, 'value_scale': -6.729615290977651e-06, 'environment': -0.5105062038136752} step=5574728
2023-01-10 04:13.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5574728.pt


Epoch 233/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:14.13 [info     ] DQN_20230110005015: epoch=233 step=5598757 epoch=233 metrics={'time_sample_batch': 5.897259592161607e-05, 'time_algorithm_update': 0.0017631812073614988, 'loss': 2.538009922057213e-11, 'time_step': 0.002098055158524067, 'td_error': 7.344239553648183e-13, 'value_scale': 6.760920203323759e-05, 'environment': -0.26206980685325} step=5598757
2023-01-10 04:14.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5598757.pt


Epoch 234/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:15.08 [info     ] DQN_20230110005015: epoch=234 step=5622786 epoch=234 metrics={'time_sample_batch': 6.109814185656444e-05, 'time_algorithm_update': 0.0018450638750696943, 'loss': 2.526235656358932e-11, 'time_step': 0.0021883277312820934, 'td_error': 2.0843236379729885e-13, 'value_scale': 3.6011936803104746e-05, 'environment': -0.05028048416709212} step=5622786
2023-01-10 04:15.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5622786.pt


Epoch 235/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:16.01 [info     ] DQN_20230110005015: epoch=235 step=5646815 epoch=235 metrics={'time_sample_batch': 5.971959251515889e-05, 'time_algorithm_update': 0.0018019653982951245, 'loss': 2.6475926826179442e-11, 'time_step': 0.0021414396602010364, 'td_error': 3.314605094516951e-14, 'value_scale': 1.4334821773013934e-05, 'environment': -0.1713943555482187} step=5646815
2023-01-10 04:16.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5646815.pt


Epoch 236/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:16.55 [info     ] DQN_20230110005015: epoch=236 step=5670844 epoch=236 metrics={'time_sample_batch': 5.9985902168622864e-05, 'time_algorithm_update': 0.001804515938320491, 'loss': 2.5709542846829983e-11, 'time_step': 0.0021419616628414514, 'td_error': 3.98980627817426e-14, 'value_scale': -1.5686652693420553e-05, 'environment': -0.08473129159954107} step=5670844
2023-01-10 04:16.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5670844.pt


Epoch 237/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:17.48 [info     ] DQN_20230110005015: epoch=237 step=5694873 epoch=237 metrics={'time_sample_batch': 6.0301068332282694e-05, 'time_algorithm_update': 0.0018091958740626814, 'loss': 2.486117040018231e-11, 'time_step': 0.002149271585009727, 'td_error': 2.893196644546736e-12, 'value_scale': -0.00013421464303080218, 'environment': -0.27697012975114826} step=5694873
2023-01-10 04:17.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5694873.pt


Epoch 238/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:18.43 [info     ] DQN_20230110005015: epoch=238 step=5718902 epoch=238 metrics={'time_sample_batch': 6.194700883803998e-05, 'time_algorithm_update': 0.0018426030806037756, 'loss': 2.5957697875503013e-11, 'time_step': 0.002185488933877724, 'td_error': 2.754507012518001e-13, 'value_scale': -4.138873788439141e-05, 'environment': -0.19309313933781413} step=5718902
2023-01-10 04:18.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5718902.pt


Epoch 239/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:19.36 [info     ] DQN_20230110005015: epoch=239 step=5742931 epoch=239 metrics={'time_sample_batch': 5.930947167997707e-05, 'time_algorithm_update': 0.0017858904257086194, 'loss': 2.6135838990385323e-11, 'time_step': 0.002123424844044495, 'td_error': 5.733922614206298e-14, 'value_scale': -1.880830149395438e-05, 'environment': 0.15327869972675118} step=5742931
2023-01-10 04:19.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5742931.pt


Epoch 240/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:20.30 [info     ] DQN_20230110005015: epoch=240 step=5766960 epoch=240 metrics={'time_sample_batch': 5.964913555356248e-05, 'time_algorithm_update': 0.0018230790408086557, 'loss': 2.3742336354375252e-11, 'time_step': 0.002159656907413836, 'td_error': 7.183693092200428e-15, 'value_scale': 6.471106029058791e-06, 'environment': -0.13054166808659207} step=5766960
2023-01-10 04:20.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5766960.pt


Epoch 241/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:21.23 [info     ] DQN_20230110005015: epoch=241 step=5790989 epoch=241 metrics={'time_sample_batch': 5.999910850799547e-05, 'time_algorithm_update': 0.0018102005778331772, 'loss': 2.45152526088559e-11, 'time_step': 0.0021525939765348182, 'td_error': 2.240087673040805e-13, 'value_scale': -3.7312987812853715e-05, 'environment': -0.2646831426678362} step=5790989
2023-01-10 04:21.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5790989.pt


Epoch 242/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:22.18 [info     ] DQN_20230110005015: epoch=242 step=5815018 epoch=242 metrics={'time_sample_batch': 6.068211736103198e-05, 'time_algorithm_update': 0.001823255872799864, 'loss': 2.5698233047560843e-11, 'time_step': 0.0021636976206107672, 'td_error': 6.172708065733069e-13, 'value_scale': -6.197208482840153e-05, 'environment': -0.2017132445090673} step=5815018
2023-01-10 04:22.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5815018.pt


Epoch 243/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:23.11 [info     ] DQN_20230110005015: epoch=243 step=5839047 epoch=243 metrics={'time_sample_batch': 6.099873215395431e-05, 'time_algorithm_update': 0.0018115529030160841, 'loss': 2.5409510953028e-11, 'time_step': 0.002154549566506496, 'td_error': 1.3773337949961683e-14, 'value_scale': -9.112901165024768e-06, 'environment': -0.40164284893142377} step=5839047
2023-01-10 04:23.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5839047.pt


Epoch 244/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:24.05 [info     ] DQN_20230110005015: epoch=244 step=5863076 epoch=244 metrics={'time_sample_batch': 6.0736163139065444e-05, 'time_algorithm_update': 0.0018137499477350438, 'loss': 2.5352681973327686e-11, 'time_step': 0.002154329216104703, 'td_error': 9.805569245142428e-16, 'value_scale': 1.888693496659513e-06, 'environment': -0.24441468273692527} step=5863076
2023-01-10 04:24.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5863076.pt


Epoch 245/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:24.58 [info     ] DQN_20230110005015: epoch=245 step=5887105 epoch=245 metrics={'time_sample_batch': 6.0388660792073307e-05, 'time_algorithm_update': 0.0017911384424083147, 'loss': 2.509060033517663e-11, 'time_step': 0.0021334977090232228, 'td_error': 2.629914712967824e-14, 'value_scale': 1.2700688056691889e-05, 'environment': 0.2282707088435212} step=5887105
2023-01-10 04:24.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5887105.pt


Epoch 246/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:25.51 [info     ] DQN_20230110005015: epoch=246 step=5911134 epoch=246 metrics={'time_sample_batch': 5.9749279492922086e-05, 'time_algorithm_update': 0.001777892692382081, 'loss': 2.6715579013633003e-11, 'time_step': 0.0021163915703768855, 'td_error': 2.779257216352126e-14, 'value_scale': -1.3101318972429004e-05, 'environment': -0.2858145897975115} step=5911134
2023-01-10 04:25.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5911134.pt


Epoch 247/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:26.46 [info     ] DQN_20230110005015: epoch=247 step=5935163 epoch=247 metrics={'time_sample_batch': 6.1027506296839763e-05, 'time_algorithm_update': 0.0018312477123881702, 'loss': 2.4438649271677685e-11, 'time_step': 0.0021741267590890084, 'td_error': 6.831511632569938e-15, 'value_scale': 6.408605853782822e-06, 'environment': -0.2112519920393761} step=5935163
2023-01-10 04:26.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5935163.pt


Epoch 248/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:27.38 [info     ] DQN_20230110005015: epoch=248 step=5959192 epoch=248 metrics={'time_sample_batch': 5.917751742955168e-05, 'time_algorithm_update': 0.0017701810931012427, 'loss': 2.480911181227933e-11, 'time_step': 0.002106691618289953, 'td_error': 4.1520721731136577e-13, 'value_scale': 5.0827976050606386e-05, 'environment': -0.1201153018186261} step=5959192
2023-01-10 04:27.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5959192.pt


Epoch 249/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:28.32 [info     ] DQN_20230110005015: epoch=249 step=5983221 epoch=249 metrics={'time_sample_batch': 6.0548327518732855e-05, 'time_algorithm_update': 0.0018008876082016994, 'loss': 2.561169209595074e-11, 'time_step': 0.0021462516593369527, 'td_error': 1.6780065449648527e-13, 'value_scale': -3.229610054214017e-05, 'environment': -0.03840846001945988} step=5983221
2023-01-10 04:28.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_5983221.pt


Epoch 250/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:29.25 [info     ] DQN_20230110005015: epoch=250 step=6007250 epoch=250 metrics={'time_sample_batch': 6.069793321750074e-05, 'time_algorithm_update': 0.0018125480319424818, 'loss': 2.469355365294827e-11, 'time_step': 0.0021515066115972955, 'td_error': 7.467682011469099e-13, 'value_scale': -6.817722443324776e-05, 'environment': -0.3413282203201294} step=6007250
2023-01-10 04:29.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6007250.pt


Epoch 251/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:30.19 [info     ] DQN_20230110005015: epoch=251 step=6031279 epoch=251 metrics={'time_sample_batch': 6.0927858463391974e-05, 'time_algorithm_update': 0.001806999295683279, 'loss': 2.5622129034136095e-11, 'time_step': 0.002150324847626872, 'td_error': 2.8100664000682806e-13, 'value_scale': -4.1811523705155925e-05, 'environment': -0.30358311488270495} step=6031279
2023-01-10 04:30.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6031279.pt


Epoch 252/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:31.13 [info     ] DQN_20230110005015: epoch=252 step=6055308 epoch=252 metrics={'time_sample_batch': 6.122582959614819e-05, 'time_algorithm_update': 0.0018121736406550692, 'loss': 2.5741187095093556e-11, 'time_step': 0.0021554537889856124, 'td_error': 5.25027398938881e-13, 'value_scale': 5.7156413450360895e-05, 'environment': -0.05800686910975219} step=6055308
2023-01-10 04:31.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6055308.pt


Epoch 253/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:32.06 [info     ] DQN_20230110005015: epoch=253 step=6079337 epoch=253 metrics={'time_sample_batch': 5.948375368679878e-05, 'time_algorithm_update': 0.001797012058584931, 'loss': 2.6499645367387572e-11, 'time_step': 0.002138669196424961, 'td_error': 4.025237176905706e-13, 'value_scale': 5.004168621540179e-05, 'environment': -0.40798419438036726} step=6079337
2023-01-10 04:32.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6079337.pt


Epoch 254/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:32.59 [info     ] DQN_20230110005015: epoch=254 step=6103366 epoch=254 metrics={'time_sample_batch': 6.081795115968778e-05, 'time_algorithm_update': 0.0017829732336480646, 'loss': 2.542863858068306e-11, 'time_step': 0.0021227787057828246, 'td_error': 6.095612612197744e-13, 'value_scale': 6.15849286105242e-05, 'environment': 0.013328258021019423} step=6103366
2023-01-10 04:32.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6103366.pt


Epoch 255/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:33.54 [info     ] DQN_20230110005015: epoch=255 step=6127395 epoch=255 metrics={'time_sample_batch': 6.051913664688148e-05, 'time_algorithm_update': 0.0018280882718108747, 'loss': 2.615915617104695e-11, 'time_step': 0.002173676195699895, 'td_error': 1.3148473521724238e-12, 'value_scale': -9.045257093428875e-05, 'environment': -0.45937914067597047} step=6127395
2023-01-10 04:33.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6127395.pt


Epoch 256/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:34.47 [info     ] DQN_20230110005015: epoch=256 step=6151424 epoch=256 metrics={'time_sample_batch': 5.936398379756764e-05, 'time_algorithm_update': 0.0017814550701809552, 'loss': 2.56777395233077e-11, 'time_step': 0.0021184393070606245, 'td_error': 1.0037057744773012e-12, 'value_scale': -7.90153836861872e-05, 'environment': -0.9309908091958239} step=6151424
2023-01-10 04:34.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6151424.pt


Epoch 257/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:35.40 [info     ] DQN_20230110005015: epoch=257 step=6175453 epoch=257 metrics={'time_sample_batch': 5.933974406271621e-05, 'time_algorithm_update': 0.0017877116205887856, 'loss': 2.5022709266989582e-11, 'time_step': 0.002126731737220901, 'td_error': 2.5502471403514534e-13, 'value_scale': -3.979423054639971e-05, 'environment': -0.08862397022464141} step=6175453
2023-01-10 04:35.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6175453.pt


Epoch 258/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:36.32 [info     ] DQN_20230110005015: epoch=258 step=6199482 epoch=258 metrics={'time_sample_batch': 5.930444116603123e-05, 'time_algorithm_update': 0.0017721674515615714, 'loss': 2.4698509966037116e-11, 'time_step': 0.0021060072104182453, 'td_error': 8.975430543458504e-15, 'value_scale': 7.164211608270331e-06, 'environment': -0.004206603326859304} step=6199482
2023-01-10 04:36.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6199482.pt


Epoch 259/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:37.26 [info     ] DQN_20230110005015: epoch=259 step=6223511 epoch=259 metrics={'time_sample_batch': 6.196612875988146e-05, 'time_algorithm_update': 0.0018153970987392258, 'loss': 2.5724670576638986e-11, 'time_step': 0.002163231221520949, 'td_error': 1.477946087683217e-13, 'value_scale': 3.0290310191518497e-05, 'environment': -0.12763016709612976} step=6223511
2023-01-10 04:37.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6223511.pt


Epoch 260/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:38.21 [info     ] DQN_20230110005015: epoch=260 step=6247540 epoch=260 metrics={'time_sample_batch': 6.089909424262476e-05, 'time_algorithm_update': 0.0018301353933232827, 'loss': 2.510251598495409e-11, 'time_step': 0.0021734764039270877, 'td_error': 5.035848718237777e-13, 'value_scale': 5.5974020298905274e-05, 'environment': 0.07465517933276229} step=6247540
2023-01-10 04:38.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6247540.pt


Epoch 261/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:39.13 [info     ] DQN_20230110005015: epoch=261 step=6271569 epoch=261 metrics={'time_sample_batch': 6.029240632306235e-05, 'time_algorithm_update': 0.0017774070245386475, 'loss': 2.689211513134701e-11, 'time_step': 0.0021180929457572293, 'td_error': 5.855564218725111e-13, 'value_scale': 6.0356815238130505e-05, 'environment': -0.12028262122966907} step=6271569
2023-01-10 04:39.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6271569.pt


Epoch 262/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:40.07 [info     ] DQN_20230110005015: epoch=262 step=6295598 epoch=262 metrics={'time_sample_batch': 6.073824678389508e-05, 'time_algorithm_update': 0.0018204868278866965, 'loss': 2.3928421791239254e-11, 'time_step': 0.0021608864166172133, 'td_error': 1.5059140475212194e-13, 'value_scale': -3.0578107179870635e-05, 'environment': -0.5545959458387182} step=6295598
2023-01-10 04:40.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6295598.pt


Epoch 263/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:41.00 [info     ] DQN_20230110005015: epoch=263 step=6319627 epoch=263 metrics={'time_sample_batch': 5.9275379281716925e-05, 'time_algorithm_update': 0.0017664783074072057, 'loss': 2.5445891104978367e-11, 'time_step': 0.002104816893581902, 'td_error': 4.990376574225987e-13, 'value_scale': -5.5714794403527825e-05, 'environment': -0.40870373500407436} step=6319627
2023-01-10 04:41.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6319627.pt


Epoch 264/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:41.54 [info     ] DQN_20230110005015: epoch=264 step=6343656 epoch=264 metrics={'time_sample_batch': 6.005763908347177e-05, 'time_algorithm_update': 0.0018181170787777148, 'loss': 2.4804157212617716e-11, 'time_step': 0.0021608892841093836, 'td_error': 4.0094473633485806e-13, 'value_scale': 4.992535285746663e-05, 'environment': 0.3518768516909955} step=6343656
2023-01-10 04:41.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6343656.pt


Epoch 265/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:42.47 [info     ] DQN_20230110005015: epoch=265 step=6367685 epoch=265 metrics={'time_sample_batch': 5.990745790184618e-05, 'time_algorithm_update': 0.0018051579291367382, 'loss': 2.475694269976873e-11, 'time_step': 0.002148612051888438, 'td_error': 5.0698894993204365e-14, 'value_scale': -1.7681464439047417e-05, 'environment': 0.3784279520168223} step=6367685
2023-01-10 04:42.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6367685.pt


Epoch 266/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:43.40 [info     ] DQN_20230110005015: epoch=266 step=6391714 epoch=266 metrics={'time_sample_batch': 6.0128324253787616e-05, 'time_algorithm_update': 0.001795197551212456, 'loss': 2.5385104796862382e-11, 'time_step': 0.002134060154217569, 'td_error': 5.548809181692469e-15, 'value_scale': 5.678429086973219e-06, 'environment': -0.1060618200445916} step=6391714
2023-01-10 04:43.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6391714.pt


Epoch 267/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:44.34 [info     ] DQN_20230110005015: epoch=267 step=6415743 epoch=267 metrics={'time_sample_batch': 5.957457083501621e-05, 'time_algorithm_update': 0.001782891098353304, 'loss': 2.6038491033279576e-11, 'time_step': 0.0021214881854744122, 'td_error': 7.680900587773984e-13, 'value_scale': 6.914814324827386e-05, 'environment': -0.05727135448985711} step=6415743
2023-01-10 04:44.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6415743.pt


Epoch 268/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:45.28 [info     ] DQN_20230110005015: epoch=268 step=6439772 epoch=268 metrics={'time_sample_batch': 6.180400134789928e-05, 'time_algorithm_update': 0.0018386175343842388, 'loss': 2.552059399698289e-11, 'time_step': 0.0021848811148366823, 'td_error': 3.2634347883099794e-13, 'value_scale': -4.506867653241515e-05, 'environment': 0.01356472046214271} step=6439772
2023-01-10 04:45.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6439772.pt


Epoch 269/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:46.22 [info     ] DQN_20230110005015: epoch=269 step=6463801 epoch=269 metrics={'time_sample_batch': 6.116398503318094e-05, 'time_algorithm_update': 0.0018223693216133268, 'loss': 2.608989086860756e-11, 'time_step': 0.0021692061921353496, 'td_error': 1.4249623752925583e-13, 'value_scale': 2.976535414122405e-05, 'environment': 0.1386627739315462} step=6463801
2023-01-10 04:46.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6463801.pt


Epoch 270/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:47.15 [info     ] DQN_20230110005015: epoch=270 step=6487830 epoch=270 metrics={'time_sample_batch': 5.9847052046023204e-05, 'time_algorithm_update': 0.0017823655634387969, 'loss': 2.4868212344602754e-11, 'time_step': 0.0021220407283947585, 'td_error': 9.14596034792582e-14, 'value_scale': -2.3842890494822622e-05, 'environment': -0.14397520062911928} step=6487830
2023-01-10 04:47.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6487830.pt


Epoch 271/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:48.08 [info     ] DQN_20230110005015: epoch=271 step=6511859 epoch=271 metrics={'time_sample_batch': 5.889697945853676e-05, 'time_algorithm_update': 0.0017631662150408437, 'loss': 2.4772563879571685e-11, 'time_step': 0.0021016537818208587, 'td_error': 3.8975407779887557e-13, 'value_scale': 4.92532789476226e-05, 'environment': -0.2947299562768012} step=6511859
2023-01-10 04:48.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6511859.pt


Epoch 272/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:49.02 [info     ] DQN_20230110005015: epoch=272 step=6535888 epoch=272 metrics={'time_sample_batch': 6.021393228993096e-05, 'time_algorithm_update': 0.0018219905844381267, 'loss': 2.4319216979540796e-11, 'time_step': 0.0021624752652546386, 'td_error': 1.9276095813017448e-14, 'value_scale': 1.0885878073368901e-05, 'environment': 0.039166979083533796} step=6535888
2023-01-10 04:49.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6535888.pt


Epoch 273/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:49.54 [info     ] DQN_20230110005015: epoch=273 step=6559917 epoch=273 metrics={'time_sample_batch': 5.967324630087684e-05, 'time_algorithm_update': 0.0017647766145190782, 'loss': 2.5861087223507406e-11, 'time_step': 0.002101988395336262, 'td_error': 2.7098476237037823e-14, 'value_scale': -1.2937599496829486e-05, 'environment': -0.1318206976083767} step=6559917
2023-01-10 04:49.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6559917.pt


Epoch 274/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:50.47 [info     ] DQN_20230110005015: epoch=274 step=6583946 epoch=274 metrics={'time_sample_batch': 5.9372110012403226e-05, 'time_algorithm_update': 0.0017783316270486211, 'loss': 2.4667755227883296e-11, 'time_step': 0.002116981678436875, 'td_error': 2.940482904457948e-13, 'value_scale': -4.277891765277563e-05, 'environment': 0.4001103705911614} step=6583946
2023-01-10 04:50.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6583946.pt


Epoch 275/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:51.41 [info     ] DQN_20230110005015: epoch=275 step=6607975 epoch=275 metrics={'time_sample_batch': 6.055254441898331e-05, 'time_algorithm_update': 0.0018350206673023747, 'loss': 2.4834105846230302e-11, 'time_step': 0.0021806134331850946, 'td_error': 1.230903071013384e-13, 'value_scale': -2.766828832980865e-05, 'environment': -0.2607530457867756} step=6607975
2023-01-10 04:51.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6607975.pt


Epoch 276/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:52.35 [info     ] DQN_20230110005015: epoch=276 step=6632004 epoch=276 metrics={'time_sample_batch': 5.972073355875607e-05, 'time_algorithm_update': 0.001817076734758514, 'loss': 2.609559759914163e-11, 'time_step': 0.0021565310432846525, 'td_error': 1.2812787160179386e-15, 'value_scale': -2.5992388258555763e-06, 'environment': -0.09250460636067029} step=6632004
2023-01-10 04:52.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6632004.pt


Epoch 277/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:53.29 [info     ] DQN_20230110005015: epoch=277 step=6656033 epoch=277 metrics={'time_sample_batch': 6.080547905706467e-05, 'time_algorithm_update': 0.0017961368384574196, 'loss': 2.47049634452288e-11, 'time_step': 0.0021391363992063566, 'td_error': 2.310782727522532e-14, 'value_scale': -1.1904967110308081e-05, 'environment': -0.3262892222097989} step=6656033
2023-01-10 04:53.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6656033.pt


Epoch 278/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:54.22 [info     ] DQN_20230110005015: epoch=278 step=6680062 epoch=278 metrics={'time_sample_batch': 6.013596428482962e-05, 'time_algorithm_update': 0.0018046345671661249, 'loss': 2.594578272580244e-11, 'time_step': 0.0021454081899877978, 'td_error': 6.094880946988838e-13, 'value_scale': 6.159446618590189e-05, 'environment': -0.30891350186017386} step=6680062
2023-01-10 04:54.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6680062.pt


Epoch 279/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:55.15 [info     ] DQN_20230110005015: epoch=279 step=6704091 epoch=279 metrics={'time_sample_batch': 5.992415682683798e-05, 'time_algorithm_update': 0.001795733821858895, 'loss': 2.4695683589504095e-11, 'time_step': 0.0021385124468010627, 'td_error': 2.833883920996043e-13, 'value_scale': -4.199477267905848e-05, 'environment': -0.41277198958468686} step=6704091
2023-01-10 04:55.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6704091.pt


Epoch 280/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:56.09 [info     ] DQN_20230110005015: epoch=280 step=6728120 epoch=280 metrics={'time_sample_batch': 6.246454652524867e-05, 'time_algorithm_update': 0.0018065258122010397, 'loss': 2.4803740191192205e-11, 'time_step': 0.0021491139225509512, 'td_error': 2.5969491662451334e-14, 'value_scale': 1.2628871954449088e-05, 'environment': -0.132492332131313} step=6728120
2023-01-10 04:56.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6728120.pt


Epoch 281/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:57.03 [info     ] DQN_20230110005015: epoch=281 step=6752149 epoch=281 metrics={'time_sample_batch': 6.0626245913243014e-05, 'time_algorithm_update': 0.0018077629713575771, 'loss': 2.3965868747763327e-11, 'time_step': 0.0021524905086858495, 'td_error': 2.302173024244337e-13, 'value_scale': -3.782843167011557e-05, 'environment': -0.32440327594859136} step=6752149
2023-01-10 04:57.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6752149.pt


Epoch 282/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:57.57 [info     ] DQN_20230110005015: epoch=282 step=6776178 epoch=282 metrics={'time_sample_batch': 6.0754657967458026e-05, 'time_algorithm_update': 0.0018284162672734141, 'loss': 2.5272404691732193e-11, 'time_step': 0.00217268026308192, 'td_error': 1.161294176716838e-12, 'value_scale': 8.502373439279322e-05, 'environment': -0.0860191719016288} step=6776178
2023-01-10 04:57.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6776178.pt


Epoch 283/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:58.52 [info     ] DQN_20230110005015: epoch=283 step=6800207 epoch=283 metrics={'time_sample_batch': 6.133573689985237e-05, 'time_algorithm_update': 0.0018321003993851668, 'loss': 2.547041029343011e-11, 'time_step': 0.002179621538869242, 'td_error': 2.6266606048296072e-14, 'value_scale': -1.2658922538804548e-05, 'environment': -0.018372629419678255} step=6800207
2023-01-10 04:58.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6800207.pt


Epoch 284/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 04:59.46 [info     ] DQN_20230110005015: epoch=284 step=6824236 epoch=284 metrics={'time_sample_batch': 6.065411714336896e-05, 'time_algorithm_update': 0.0018185782389891045, 'loss': 2.4359657773919176e-11, 'time_step': 0.0021610620678763517, 'td_error': 9.902002061278459e-14, 'value_scale': 2.4784042235978263e-05, 'environment': 0.011057153247020701} step=6824236
2023-01-10 04:59.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6824236.pt


Epoch 285/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:00.39 [info     ] DQN_20230110005015: epoch=285 step=6848265 epoch=285 metrics={'time_sample_batch': 6.066021924608432e-05, 'time_algorithm_update': 0.0018073549440893431, 'loss': 2.5441573438900975e-11, 'time_step': 0.0021517773862039656, 'td_error': 1.070547138426942e-12, 'value_scale': -8.163606443586668e-05, 'environment': -0.4383502589087417} step=6848265
2023-01-10 05:00.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6848265.pt


Epoch 286/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:01.33 [info     ] DQN_20230110005015: epoch=286 step=6872294 epoch=286 metrics={'time_sample_batch': 6.107926998767886e-05, 'time_algorithm_update': 0.0018108137647401845, 'loss': 2.4947710010501053e-11, 'time_step': 0.002154981317559433, 'td_error': 3.341009896133063e-13, 'value_scale': 4.558211629634129e-05, 'environment': -0.41636073880398794} step=6872294
2023-01-10 05:01.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6872294.pt


Epoch 287/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:02.27 [info     ] DQN_20230110005015: epoch=287 step=6896323 epoch=287 metrics={'time_sample_batch': 6.062023310959178e-05, 'time_algorithm_update': 0.0018144210103577237, 'loss': 2.4462808591012632e-11, 'time_step': 0.002159225325036909, 'td_error': 3.70468093907493e-14, 'value_scale': -1.5103936170721288e-05, 'environment': -0.41465358878974834} step=6896323
2023-01-10 05:02.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6896323.pt


Epoch 288/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:03.22 [info     ] DQN_20230110005015: epoch=288 step=6920352 epoch=288 metrics={'time_sample_batch': 6.13715160582127e-05, 'time_algorithm_update': 0.001841921788277194, 'loss': 2.5878246525598873e-11, 'time_step': 0.0021890562429693616, 'td_error': 4.4434756587738705e-13, 'value_scale': -5.2581368151904494e-05, 'environment': -0.4512137580696062} step=6920352
2023-01-10 05:03.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6920352.pt


Epoch 289/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:04.14 [info     ] DQN_20230110005015: epoch=289 step=6944381 epoch=289 metrics={'time_sample_batch': 5.900945659086416e-05, 'time_algorithm_update': 0.001761823047973069, 'loss': 2.4263824540564688e-11, 'time_step': 0.0020999492313626793, 'td_error': 3.9641475091118945e-14, 'value_scale': 1.5656448359869082e-05, 'environment': -0.3969804635672367} step=6944381
2023-01-10 05:04.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6944381.pt


Epoch 290/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:05.08 [info     ] DQN_20230110005015: epoch=290 step=6968410 epoch=290 metrics={'time_sample_batch': 6.160207631967107e-05, 'time_algorithm_update': 0.0018154763863860526, 'loss': 2.499204538407904e-11, 'time_step': 0.0021588099653232985, 'td_error': 6.932490986302745e-13, 'value_scale': 6.569015315785744e-05, 'environment': -0.08359228157982829} step=6968410
2023-01-10 05:05.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6968410.pt


Epoch 291/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:06.01 [info     ] DQN_20230110005015: epoch=291 step=6992439 epoch=291 metrics={'time_sample_batch': 5.9861587949239474e-05, 'time_algorithm_update': 0.0017750151788723534, 'loss': 2.4362455305031807e-11, 'time_step': 0.0021177779780359343, 'td_error': 1.8916576188295457e-14, 'value_scale': -1.078763840011398e-05, 'environment': -0.23762734788393175} step=6992439
2023-01-10 05:06.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_6992439.pt


Epoch 292/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:06.55 [info     ] DQN_20230110005015: epoch=292 step=7016468 epoch=292 metrics={'time_sample_batch': 6.0986984365962455e-05, 'time_algorithm_update': 0.0018099538246747573, 'loss': 2.582905498932979e-11, 'time_step': 0.0021527116131686286, 'td_error': 2.380701161520152e-13, 'value_scale': 3.8484919546581714e-05, 'environment': -0.5724706813954338} step=7016468
2023-01-10 05:06.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7016468.pt


Epoch 293/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:07.48 [info     ] DQN_20230110005015: epoch=293 step=7040497 epoch=293 metrics={'time_sample_batch': 5.9338225978626043e-05, 'time_algorithm_update': 0.0017872325410318612, 'loss': 2.5506057973122682e-11, 'time_step': 0.002131020414074677, 'td_error': 1.8529239484664936e-12, 'value_scale': 0.0001074077110254416, 'environment': 0.09275157055510759} step=7040497
2023-01-10 05:07.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7040497.pt


Epoch 294/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:08.43 [info     ] DQN_20230110005015: epoch=294 step=7064526 epoch=294 metrics={'time_sample_batch': 6.114027117059603e-05, 'time_algorithm_update': 0.0018353883611599779, 'loss': 2.6078311383920022e-11, 'time_step': 0.0021782417194967023, 'td_error': 2.2531556631629116e-13, 'value_scale': 3.7438686217767664e-05, 'environment': -0.468438246197762} step=7064526
2023-01-10 05:08.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7064526.pt


Epoch 295/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:09.35 [info     ] DQN_20230110005015: epoch=295 step=7088555 epoch=295 metrics={'time_sample_batch': 5.9834401345271846e-05, 'time_algorithm_update': 0.0017725865816801119, 'loss': 2.5491677850528867e-11, 'time_step': 0.0021130138531205082, 'td_error': 2.225075127443623e-14, 'value_scale': 1.17093891519841e-05, 'environment': -0.38467891869052184} step=7088555
2023-01-10 05:09.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7088555.pt


Epoch 296/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:10.30 [info     ] DQN_20230110005015: epoch=296 step=7112584 epoch=296 metrics={'time_sample_batch': 6.151481128978226e-05, 'time_algorithm_update': 0.0018308894941534827, 'loss': 2.5202969988277406e-11, 'time_step': 0.002177760139565982, 'td_error': 2.1141220904611732e-13, 'value_scale': -3.625957314886109e-05, 'environment': -0.1540743177952895} step=7112584
2023-01-10 05:10.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7112584.pt


Epoch 297/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:11.23 [info     ] DQN_20230110005015: epoch=297 step=7136613 epoch=297 metrics={'time_sample_batch': 6.000233319642228e-05, 'time_algorithm_update': 0.0018081371741247433, 'loss': 2.510206746620261e-11, 'time_step': 0.0021538360769062376, 'td_error': 4.4598914570673995e-14, 'value_scale': -1.6606751664910344e-05, 'environment': -0.23924344718334584} step=7136613
2023-01-10 05:11.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7136613.pt


Epoch 298/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:12.17 [info     ] DQN_20230110005015: epoch=298 step=7160642 epoch=298 metrics={'time_sample_batch': 6.011140704219462e-05, 'time_algorithm_update': 0.0018178735205413668, 'loss': 2.6317916990497574e-11, 'time_step': 0.0021615088807047716, 'td_error': 3.574806457523697e-14, 'value_scale': 1.4838575163323643e-05, 'environment': -0.23469368008421232} step=7160642
2023-01-10 05:12.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7160642.pt


Epoch 299/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:13.10 [info     ] DQN_20230110005015: epoch=299 step=7184671 epoch=299 metrics={'time_sample_batch': 5.908726584207372e-05, 'time_algorithm_update': 0.0017769662443581154, 'loss': 2.5709350560274364e-11, 'time_step': 0.0021189787031743075, 'td_error': 7.717934284751199e-14, 'value_scale': -2.177620070873725e-05, 'environment': -0.1636085543701502} step=7184671
2023-01-10 05:13.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7184671.pt


Epoch 300/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:14.03 [info     ] DQN_20230110005015: epoch=300 step=7208700 epoch=300 metrics={'time_sample_batch': 5.934185747390055e-05, 'time_algorithm_update': 0.0017699537476060928, 'loss': 2.3952900360602007e-11, 'time_step': 0.002105643455641582, 'td_error': 1.527412917054991e-15, 'value_scale': 2.4688240158926613e-06, 'environment': -0.4612695455829228} step=7208700
2023-01-10 05:14.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7208700.pt


Epoch 301/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:14.57 [info     ] DQN_20230110005015: epoch=301 step=7232729 epoch=301 metrics={'time_sample_batch': 6.135157260055762e-05, 'time_algorithm_update': 0.0018299217304291809, 'loss': 2.5196480580547728e-11, 'time_step': 0.002177555614942776, 'td_error': 1.3238949260410844e-13, 'value_scale': -2.86767700999592e-05, 'environment': -0.1843841867306704} step=7232729
2023-01-10 05:14.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7232729.pt


Epoch 302/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:15.50 [info     ] DQN_20230110005015: epoch=302 step=7256758 epoch=302 metrics={'time_sample_batch': 5.990702132864378e-05, 'time_algorithm_update': 0.0018014928177256446, 'loss': 2.539262207432793e-11, 'time_step': 0.002144711895418443, 'td_error': 1.0977465374196783e-13, 'value_scale': -2.610392962096327e-05, 'environment': 0.2003515872442198} step=7256758
2023-01-10 05:15.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7256758.pt


Epoch 303/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:16.45 [info     ] DQN_20230110005015: epoch=303 step=7280787 epoch=303 metrics={'time_sample_batch': 6.159167793975936e-05, 'time_algorithm_update': 0.001830669957365385, 'loss': 2.554812641326031e-11, 'time_step': 0.0021789787542835356, 'td_error': 1.0316275070041038e-13, 'value_scale': -2.5313580868840437e-05, 'environment': -0.8305675727116968} step=7280787
2023-01-10 05:16.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7280787.pt


Epoch 304/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:17.39 [info     ] DQN_20230110005015: epoch=304 step=7304816 epoch=304 metrics={'time_sample_batch': 6.116890640382618e-05, 'time_algorithm_update': 0.00182779867494591, 'loss': 2.4511304552020146e-11, 'time_step': 0.002171890819666562, 'td_error': 1.2732795925203275e-13, 'value_scale': -2.8120234207073452e-05, 'environment': -0.11469586627520492} step=7304816
2023-01-10 05:17.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7304816.pt


Epoch 305/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:18.33 [info     ] DQN_20230110005015: epoch=305 step=7328845 epoch=305 metrics={'time_sample_batch': 6.0901366407700884e-05, 'time_algorithm_update': 0.0017983303112138142, 'loss': 2.568333349021508e-11, 'time_step': 0.0021466524434588744, 'td_error': 8.739838122774276e-13, 'value_scale': -7.376032958166187e-05, 'environment': -0.10530363401047722} step=7328845
2023-01-10 05:18.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7328845.pt


Epoch 306/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:19.26 [info     ] DQN_20230110005015: epoch=306 step=7352874 epoch=306 metrics={'time_sample_batch': 5.981335653249252e-05, 'time_algorithm_update': 0.0017899860379857523, 'loss': 2.4735777314759088e-11, 'time_step': 0.00213024024783987, 'td_error': 4.4651341087576113e-13, 'value_scale': -5.271590070031798e-05, 'environment': -0.12185565225511268} step=7352874
2023-01-10 05:19.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7352874.pt


Epoch 307/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:20.19 [info     ] DQN_20230110005015: epoch=307 step=7376903 epoch=307 metrics={'time_sample_batch': 6.076014489884273e-05, 'time_algorithm_update': 0.0017765405557194206, 'loss': 2.513817868240391e-11, 'time_step': 0.0021199385788146117, 'td_error': 1.3105992144851608e-12, 'value_scale': -9.033358629717225e-05, 'environment': -0.13373547858649842} step=7376903
2023-01-10 05:20.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7376903.pt


Epoch 308/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:21.13 [info     ] DQN_20230110005015: epoch=308 step=7400932 epoch=308 metrics={'time_sample_batch': 6.0889737685127867e-05, 'time_algorithm_update': 0.0018088845576808973, 'loss': 2.618817731775917e-11, 'time_step': 0.002151717069647207, 'td_error': 1.262623918975788e-13, 'value_scale': -2.8020711809692385e-05, 'environment': -0.30655097261157993} step=7400932
2023-01-10 05:21.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7400932.pt


Epoch 309/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:22.06 [info     ] DQN_20230110005015: epoch=309 step=7424961 epoch=309 metrics={'time_sample_batch': 6.12305822907834e-05, 'time_algorithm_update': 0.001799849625646639, 'loss': 2.5370981097935286e-11, 'time_step': 0.002145543527680542, 'td_error': 8.697799897662652e-13, 'value_scale': -7.358958127692892e-05, 'environment': -0.003736612600427702} step=7424961
2023-01-10 05:22.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7424961.pt


Epoch 310/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:23.00 [info     ] DQN_20230110005015: epoch=310 step=7448990 epoch=310 metrics={'time_sample_batch': 6.064395689429492e-05, 'time_algorithm_update': 0.0017921833506019044, 'loss': 2.5386116594640887e-11, 'time_step': 0.002139346688580258, 'td_error': 3.1637362272140283e-13, 'value_scale': -4.437834803047858e-05, 'environment': -0.5139986773621616} step=7448990
2023-01-10 05:23.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7448990.pt


Epoch 311/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:23.54 [info     ] DQN_20230110005015: epoch=311 step=7473019 epoch=311 metrics={'time_sample_batch': 6.0437547068623866e-05, 'time_algorithm_update': 0.0018132087061073683, 'loss': 2.46397382570299e-11, 'time_step': 0.00216241134696896, 'td_error': 1.2229390936443692e-13, 'value_scale': -2.7583924215844792e-05, 'environment': -0.041751544085148834} step=7473019
2023-01-10 05:23.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7473019.pt


Epoch 312/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:24.47 [info     ] DQN_20230110005015: epoch=312 step=7497048 epoch=312 metrics={'time_sample_batch': 6.182731832575474e-05, 'time_algorithm_update': 0.0018125721426897963, 'loss': 2.658648555198878e-11, 'time_step': 0.0021584085362636916, 'td_error': 9.173188781529184e-13, 'value_scale': -7.55747216799485e-05, 'environment': -0.23540965414801535} step=7497048
2023-01-10 05:24.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7497048.pt


Epoch 313/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:25.41 [info     ] DQN_20230110005015: epoch=313 step=7521077 epoch=313 metrics={'time_sample_batch': 6.051970220762095e-05, 'time_algorithm_update': 0.0018083033795273206, 'loss': 2.454968090486102e-11, 'time_step': 0.0021548725711435625, 'td_error': 8.520548919736557e-15, 'value_scale': -7.235396373526328e-06, 'environment': -0.331927055280714} step=7521077
2023-01-10 05:25.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7521077.pt


Epoch 314/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:26.34 [info     ] DQN_20230110005015: epoch=314 step=7545106 epoch=314 metrics={'time_sample_batch': 5.967489337250407e-05, 'time_algorithm_update': 0.0017777232622910768, 'loss': 2.5198811961457797e-11, 'time_step': 0.0021197499097363472, 'td_error': 5.636766157543558e-16, 'value_scale': 1.5402280644270135e-06, 'environment': -0.12707832887635756} step=7545106
2023-01-10 05:26.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7545106.pt


Epoch 315/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:27.28 [info     ] DQN_20230110005015: epoch=315 step=7569135 epoch=315 metrics={'time_sample_batch': 6.061605589781427e-05, 'time_algorithm_update': 0.0018071504789988464, 'loss': 2.6297515368582374e-11, 'time_step': 0.002157165612356459, 'td_error': 7.26270604674254e-13, 'value_scale': 6.723867939962464e-05, 'environment': -0.2568911573078345} step=7569135
2023-01-10 05:27.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7569135.pt


Epoch 316/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:28.23 [info     ] DQN_20230110005015: epoch=316 step=7593164 epoch=316 metrics={'time_sample_batch': 6.110866922401321e-05, 'time_algorithm_update': 0.001839627635787055, 'loss': 2.56298331373242e-11, 'time_step': 0.002188650458099849, 'td_error': 2.72506738306784e-13, 'value_scale': 4.1164807180341875e-05, 'environment': -0.6956288327431135} step=7593164
2023-01-10 05:28.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7593164.pt


Epoch 317/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:29.17 [info     ] DQN_20230110005015: epoch=317 step=7617193 epoch=317 metrics={'time_sample_batch': 6.005171557888466e-05, 'time_algorithm_update': 0.0018104580369571745, 'loss': 2.459512673414102e-11, 'time_step': 0.002157342384814958, 'td_error': 2.1180080759772488e-13, 'value_scale': 3.6288818244210305e-05, 'environment': -0.07146683039968216} step=7617193
2023-01-10 05:29.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7617193.pt


Epoch 318/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:30.10 [info     ] DQN_20230110005015: epoch=318 step=7641222 epoch=318 metrics={'time_sample_batch': 5.901190735406854e-05, 'time_algorithm_update': 0.0017831792069005334, 'loss': 2.5159542743957978e-11, 'time_step': 0.0021259933232596327, 'td_error': 2.131998528483435e-15, 'value_scale': 3.4793994743028673e-06, 'environment': -0.07842190624211305} step=7641222
2023-01-10 05:30.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7641222.pt


Epoch 319/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:31.04 [info     ] DQN_20230110005015: epoch=319 step=7665251 epoch=319 metrics={'time_sample_batch': 6.059652916912511e-05, 'time_algorithm_update': 0.0018191892231858632, 'loss': 2.6721632243472944e-11, 'time_step': 0.0021695290380185244, 'td_error': 1.2753854059368625e-12, 'value_scale': 8.911119671044308e-05, 'environment': -0.48449254190997537} step=7665251
2023-01-10 05:31.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7665251.pt


Epoch 320/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:31.57 [info     ] DQN_20230110005015: epoch=320 step=7689280 epoch=320 metrics={'time_sample_batch': 5.959107131764328e-05, 'time_algorithm_update': 0.0017859162926708617, 'loss': 2.606580488674287e-11, 'time_step': 0.002124520484028627, 'td_error': 4.634587466652624e-13, 'value_scale': 5.3709458133637516e-05, 'environment': -0.3921939956903386} step=7689280
2023-01-10 05:31.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7689280.pt


Epoch 321/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:32.52 [info     ] DQN_20230110005015: epoch=321 step=7713309 epoch=321 metrics={'time_sample_batch': 5.9991577620254066e-05, 'time_algorithm_update': 0.001835183290820269, 'loss': 2.5051657578890926e-11, 'time_step': 0.002179375261972497, 'td_error': 1.22843889959473e-13, 'value_scale': 2.7601318798780562e-05, 'environment': -0.2874621526362985} step=7713309
2023-01-10 05:32.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7713309.pt


Epoch 322/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:33.45 [info     ] DQN_20230110005015: epoch=322 step=7737338 epoch=322 metrics={'time_sample_batch': 5.9799495333316317e-05, 'time_algorithm_update': 0.001793630164116776, 'loss': 2.426891648955276e-11, 'time_step': 0.002130955106692445, 'td_error': 3.560170396258346e-15, 'value_scale': 4.491711666477927e-06, 'environment': 0.020875057985303595} step=7737338
2023-01-10 05:33.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7737338.pt


Epoch 323/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:34.39 [info     ] DQN_20230110005015: epoch=323 step=7761367 epoch=323 metrics={'time_sample_batch': 6.080278024090438e-05, 'time_algorithm_update': 0.0018233290285776208, 'loss': 2.5096082983878692e-11, 'time_step': 0.00217255801266313, 'td_error': 5.1373194826862947e-14, 'value_scale': 1.783540860526319e-05, 'environment': -0.019977930021693825} step=7761367
2023-01-10 05:34.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7761367.pt


Epoch 324/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:35.32 [info     ] DQN_20230110005015: epoch=324 step=7785396 epoch=324 metrics={'time_sample_batch': 6.0064832619193135e-05, 'time_algorithm_update': 0.0017772118564729383, 'loss': 2.4751611212981257e-11, 'time_step': 0.0021187334184893866, 'td_error': 4.705645458839066e-14, 'value_scale': 1.7058905078692102e-05, 'environment': -0.2695535493872012} step=7785396
2023-01-10 05:35.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7785396.pt


Epoch 325/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:36.25 [info     ] DQN_20230110005015: epoch=325 step=7809425 epoch=325 metrics={'time_sample_batch': 5.955226591322087e-05, 'time_algorithm_update': 0.0017763792121547792, 'loss': 2.6429474347502956e-11, 'time_step': 0.0021195634929789225, 'td_error': 1.3412665562468092e-12, 'value_scale': 9.137807940226905e-05, 'environment': -0.18051286676948752} step=7809425
2023-01-10 05:36.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7809425.pt


Epoch 326/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:37.18 [info     ] DQN_20230110005015: epoch=326 step=7833454 epoch=326 metrics={'time_sample_batch': 6.0029152682015174e-05, 'time_algorithm_update': 0.0017798804796274356, 'loss': 2.5366040096790976e-11, 'time_step': 0.0021214023194631947, 'td_error': 1.4786889080346655e-14, 'value_scale': 9.50838811599446e-06, 'environment': 0.19620510096755492} step=7833454
2023-01-10 05:37.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7833454.pt


Epoch 327/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:38.12 [info     ] DQN_20230110005015: epoch=327 step=7857483 epoch=327 metrics={'time_sample_batch': 6.073859405803335e-05, 'time_algorithm_update': 0.0018044998843331846, 'loss': 2.549451347533049e-11, 'time_step': 0.0021507780006888447, 'td_error': 1.0126703875575568e-13, 'value_scale': -2.509056362673464e-05, 'environment': -0.20322900598750512} step=7857483
2023-01-10 05:38.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7857483.pt


Epoch 328/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:39.06 [info     ] DQN_20230110005015: epoch=328 step=7881512 epoch=328 metrics={'time_sample_batch': 6.158243052556307e-05, 'time_algorithm_update': 0.001839302130776193, 'loss': 2.4702317977085704e-11, 'time_step': 0.002188085333933579, 'td_error': 3.034830181839406e-13, 'value_scale': -4.346350201703711e-05, 'environment': -0.013555695572416825} step=7881512
2023-01-10 05:39.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7881512.pt


Epoch 329/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:40.00 [info     ] DQN_20230110005015: epoch=329 step=7905541 epoch=329 metrics={'time_sample_batch': 6.010099874016468e-05, 'time_algorithm_update': 0.0018001293202376037, 'loss': 2.4929522712168826e-11, 'time_step': 0.0021438605578295265, 'td_error': 1.2418814563036398e-13, 'value_scale': 2.7789404084842624e-05, 'environment': -0.2309120871975721} step=7905541
2023-01-10 05:40.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7905541.pt


Epoch 330/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:40.54 [info     ] DQN_20230110005015: epoch=330 step=7929570 epoch=330 metrics={'time_sample_batch': 6.108997595325591e-05, 'time_algorithm_update': 0.0018356376146921936, 'loss': 2.4948845521078912e-11, 'time_step': 0.002180252615355429, 'td_error': 6.980233153349674e-13, 'value_scale': -6.59178712935624e-05, 'environment': -0.27446259283733776} step=7929570
2023-01-10 05:40.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7929570.pt


Epoch 331/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:41.47 [info     ] DQN_20230110005015: epoch=331 step=7953599 epoch=331 metrics={'time_sample_batch': 5.934005164838154e-05, 'time_algorithm_update': 0.0017626802197675082, 'loss': 2.3820518709873212e-11, 'time_step': 0.002103949263874841, 'td_error': 3.105150934475346e-14, 'value_scale': -1.3873812100566411e-05, 'environment': 0.007012815677265502} step=7953599
2023-01-10 05:41.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7953599.pt


Epoch 332/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:42.40 [info     ] DQN_20230110005015: epoch=332 step=7977628 epoch=332 metrics={'time_sample_batch': 6.012454392673956e-05, 'time_algorithm_update': 0.0017924222454426813, 'loss': 2.4634926365400444e-11, 'time_step': 0.0021360149305755514, 'td_error': 3.2799313218619194e-14, 'value_scale': -1.4251104151453617e-05, 'environment': -0.29115758233701916} step=7977628
2023-01-10 05:42.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_7977628.pt


Epoch 333/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:43.35 [info     ] DQN_20230110005015: epoch=333 step=8001657 epoch=333 metrics={'time_sample_batch': 6.18794789013233e-05, 'time_algorithm_update': 0.00183181987133627, 'loss': 2.548621850682832e-11, 'time_step': 0.002184999009445567, 'td_error': 2.302592395275653e-13, 'value_scale': -3.7849860193489744e-05, 'environment': 0.04163649798707223} step=8001657
2023-01-10 05:43.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8001657.pt


Epoch 334/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:44.29 [info     ] DQN_20230110005015: epoch=334 step=8025686 epoch=334 metrics={'time_sample_batch': 6.0553953359772875e-05, 'time_algorithm_update': 0.0018254127924726758, 'loss': 2.4925923841059343e-11, 'time_step': 0.002170402888893719, 'td_error': 3.906007721127975e-15, 'value_scale': 4.870010356611604e-06, 'environment': -0.20340372943045448} step=8025686
2023-01-10 05:44.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8025686.pt


Epoch 335/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:45.23 [info     ] DQN_20230110005015: epoch=335 step=8049715 epoch=335 metrics={'time_sample_batch': 6.0563667113526275e-05, 'time_algorithm_update': 0.001814708325135494, 'loss': 2.438206466231921e-11, 'time_step': 0.0021654739873607416, 'td_error': 8.041295887511077e-14, 'value_scale': 2.2367763545296852e-05, 'environment': -0.28931745914992346} step=8049715
2023-01-10 05:45.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8049715.pt


Epoch 336/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:46.17 [info     ] DQN_20230110005015: epoch=336 step=8073744 epoch=336 metrics={'time_sample_batch': 6.133814797458381e-05, 'time_algorithm_update': 0.0018137279900873866, 'loss': 2.475199946356992e-11, 'time_step': 0.0021568738127812456, 'td_error': 2.839437079805562e-13, 'value_scale': -4.2041932351161214e-05, 'environment': -0.2111875657348959} step=8073744
2023-01-10 05:46.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8073744.pt


Epoch 337/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:47.09 [info     ] DQN_20230110005015: epoch=337 step=8097773 epoch=337 metrics={'time_sample_batch': 5.995381403824647e-05, 'time_algorithm_update': 0.0017716402993417917, 'loss': 2.514987719678988e-11, 'time_step': 0.002116754501617735, 'td_error': 4.844126673317665e-13, 'value_scale': 5.4916301523461365e-05, 'environment': -0.31251076872925154} step=8097773
2023-01-10 05:47.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8097773.pt


Epoch 338/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:48.02 [info     ] DQN_20230110005015: epoch=338 step=8121802 epoch=338 metrics={'time_sample_batch': 6.034101478030229e-05, 'time_algorithm_update': 0.0017866403592491603, 'loss': 2.4941073884470604e-11, 'time_step': 0.0021298009162886004, 'td_error': 3.3080390326782093e-13, 'value_scale': 4.5378642697336714e-05, 'environment': -0.08686314955404155} step=8121802
2023-01-10 05:48.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8121802.pt


Epoch 339/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:48.57 [info     ] DQN_20230110005015: epoch=339 step=8145831 epoch=339 metrics={'time_sample_batch': 6.142288286432235e-05, 'time_algorithm_update': 0.00181835187578366, 'loss': 2.5150044573300864e-11, 'time_step': 0.0021665608065817616, 'td_error': 3.2289830828956734e-15, 'value_scale': 4.415576192325562e-06, 'environment': -0.40206451962197287} step=8145831
2023-01-10 05:48.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8145831.pt


Epoch 340/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:49.51 [info     ] DQN_20230110005015: epoch=340 step=8169860 epoch=340 metrics={'time_sample_batch': 6.123716065517411e-05, 'time_algorithm_update': 0.0018200847538894043, 'loss': 2.418059919773026e-11, 'time_step': 0.002165240142878147, 'td_error': 2.3445210819532727e-15, 'value_scale': -3.694297464316009e-06, 'environment': -0.44512330049429877} step=8169860
2023-01-10 05:49.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8169860.pt


Epoch 341/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:50.44 [info     ] DQN_20230110005015: epoch=341 step=8193889 epoch=341 metrics={'time_sample_batch': 6.0677563108761486e-05, 'time_algorithm_update': 0.0017978116820935993, 'loss': 2.6241326193428116e-11, 'time_step': 0.0021443492816853767, 'td_error': 4.821947078819774e-13, 'value_scale': 5.478270089445772e-05, 'environment': -0.03770226904390091} step=8193889
2023-01-10 05:50.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8193889.pt


Epoch 342/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:51.38 [info     ] DQN_20230110005015: epoch=342 step=8217918 epoch=342 metrics={'time_sample_batch': 6.069644489976528e-05, 'time_algorithm_update': 0.0018214679467821444, 'loss': 2.485240240220001e-11, 'time_step': 0.0021650092849531417, 'td_error': 1.8112190832528613e-15, 'value_scale': 2.970063258641086e-06, 'environment': -0.1778503946711228} step=8217918
2023-01-10 05:51.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8217918.pt


Epoch 343/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:52.32 [info     ] DQN_20230110005015: epoch=343 step=8241947 epoch=343 metrics={'time_sample_batch': 6.023051214950392e-05, 'time_algorithm_update': 0.001788657248067302, 'loss': 2.6340902420500027e-11, 'time_step': 0.0021327924547811005, 'td_error': 6.376315258416711e-13, 'value_scale': -6.299863420633901e-05, 'environment': -0.35883326842126123} step=8241947
2023-01-10 05:52.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8241947.pt


Epoch 344/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:53.26 [info     ] DQN_20230110005015: epoch=344 step=8265976 epoch=344 metrics={'time_sample_batch': 6.0575831630484057e-05, 'time_algorithm_update': 0.0018259046021072976, 'loss': 2.496325649222199e-11, 'time_step': 0.0021722188250512197, 'td_error': 2.461063251469005e-14, 'value_scale': -1.2297117164407654e-05, 'environment': -0.08064686414465765} step=8265976
2023-01-10 05:53.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8265976.pt


Epoch 345/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:54.19 [info     ] DQN_20230110005015: epoch=345 step=8290005 epoch=345 metrics={'time_sample_batch': 6.006121104603686e-05, 'time_algorithm_update': 0.0017946241619216949, 'loss': 2.566699582189559e-11, 'time_step': 0.0021407071002895275, 'td_error': 6.482464137392247e-14, 'value_scale': -2.0043939720345953e-05, 'environment': -0.372973221446836} step=8290005
2023-01-10 05:54.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8290005.pt


Epoch 346/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:55.14 [info     ] DQN_20230110005015: epoch=346 step=8314034 epoch=346 metrics={'time_sample_batch': 6.109159325852843e-05, 'time_algorithm_update': 0.001855833997063246, 'loss': 2.575409312089166e-11, 'time_step': 0.0022030577905080137, 'td_error': 3.5567435746117765e-12, 'value_scale': -0.00014881499804404106, 'environment': -0.004884640187447842} step=8314034
2023-01-10 05:55.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8314034.pt


Epoch 347/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:56.07 [info     ] DQN_20230110005015: epoch=347 step=8338063 epoch=347 metrics={'time_sample_batch': 5.8873245751715375e-05, 'time_algorithm_update': 0.0017721988848321442, 'loss': 2.4111641341946547e-11, 'time_step': 0.0021136190229559806, 'td_error': 3.173326970412722e-13, 'value_scale': -4.4434534826899715e-05, 'environment': -0.752244660648077} step=8338063
2023-01-10 05:56.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8338063.pt


Epoch 348/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:57.00 [info     ] DQN_20230110005015: epoch=348 step=8362092 epoch=348 metrics={'time_sample_batch': 6.008081715167191e-05, 'time_algorithm_update': 0.001796893935767327, 'loss': 2.5841327365049278e-11, 'time_step': 0.002142858294900117, 'td_error': 3.055014191662747e-13, 'value_scale': -4.359414046238912e-05, 'environment': -0.15017813769366264} step=8362092
2023-01-10 05:57.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8362092.pt


Epoch 349/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:57.53 [info     ] DQN_20230110005015: epoch=349 step=8386121 epoch=349 metrics={'time_sample_batch': 5.978373900955697e-05, 'time_algorithm_update': 0.0017779816243278334, 'loss': 2.4913456922447888e-11, 'time_step': 0.0021225956724677184, 'td_error': 5.84856308328463e-14, 'value_scale': -1.90309233145801e-05, 'environment': -0.3155115179279343} step=8386121
2023-01-10 05:57.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8386121.pt


Epoch 350/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:58.48 [info     ] DQN_20230110005015: epoch=350 step=8410150 epoch=350 metrics={'time_sample_batch': 6.130628805292685e-05, 'time_algorithm_update': 0.0018406750046659674, 'loss': 2.678984080052728e-11, 'time_step': 0.0021878012140778805, 'td_error': 1.6551661752472199e-12, 'value_scale': -0.0001015129948053292, 'environment': -0.5422028963515111} step=8410150
2023-01-10 05:58.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8410150.pt


Epoch 351/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 05:59.42 [info     ] DQN_20230110005015: epoch=351 step=8434179 epoch=351 metrics={'time_sample_batch': 6.056740775210138e-05, 'time_algorithm_update': 0.0018102261074433995, 'loss': 2.4969334893571388e-11, 'time_step': 0.002155257529486897, 'td_error': 7.219250763223021e-14, 'value_scale': -2.1173324074533873e-05, 'environment': -0.5134378747868472} step=8434179
2023-01-10 05:59.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8434179.pt


Epoch 352/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:00.36 [info     ] DQN_20230110005015: epoch=352 step=8458208 epoch=352 metrics={'time_sample_batch': 6.050625773741068e-05, 'time_algorithm_update': 0.0018191657772204707, 'loss': 2.4874066521280396e-11, 'time_step': 0.002166594194509627, 'td_error': 7.355111243040428e-14, 'value_scale': -2.133311535969482e-05, 'environment': -0.21809231483554165} step=8458208
2023-01-10 06:00.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8458208.pt


Epoch 353/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:01.30 [info     ] DQN_20230110005015: epoch=353 step=8482237 epoch=353 metrics={'time_sample_batch': 6.145952524696923e-05, 'time_algorithm_update': 0.0018162011772789824, 'loss': 2.6365859674499097e-11, 'time_step': 0.0021670022515442155, 'td_error': 1.2674428893710389e-12, 'value_scale': 8.882758931076517e-05, 'environment': -0.22787645564831244} step=8482237
2023-01-10 06:01.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8482237.pt


Epoch 354/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:02.23 [info     ] DQN_20230110005015: epoch=354 step=8506266 epoch=354 metrics={'time_sample_batch': 6.021740503131369e-05, 'time_algorithm_update': 0.0017839108936656375, 'loss': 2.4332847668635687e-11, 'time_step': 0.00212534331537185, 'td_error': 4.296865815802184e-14, 'value_scale': 1.6313981595476008e-05, 'environment': -0.37147279546133377} step=8506266
2023-01-10 06:02.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8506266.pt


Epoch 355/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:03.16 [info     ] DQN_20230110005015: epoch=355 step=8530295 epoch=355 metrics={'time_sample_batch': 5.978193318403795e-05, 'time_algorithm_update': 0.001779281759168817, 'loss': 2.4904115439505277e-11, 'time_step': 0.0021233701632508944, 'td_error': 1.0202914006617905e-13, 'value_scale': 2.5189051278726296e-05, 'environment': -0.27187569493574465} step=8530295
2023-01-10 06:03.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8530295.pt


Epoch 356/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:04.09 [info     ] DQN_20230110005015: epoch=356 step=8554324 epoch=356 metrics={'time_sample_batch': 5.9823566392157736e-05, 'time_algorithm_update': 0.001799414441540792, 'loss': 2.541097800870315e-11, 'time_step': 0.0021414187542979123, 'td_error': 2.0262309491410426e-13, 'value_scale': -3.550489040052042e-05, 'environment': -0.20728018378383095} step=8554324
2023-01-10 06:04.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8554324.pt


Epoch 357/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:05.02 [info     ] DQN_20230110005015: epoch=357 step=8578353 epoch=357 metrics={'time_sample_batch': 5.966717396451618e-05, 'time_algorithm_update': 0.0017706969043398784, 'loss': 2.525337475823396e-11, 'time_step': 0.00211299797773133, 'td_error': 1.3864453949468683e-12, 'value_scale': -9.29108614441731e-05, 'environment': -0.0668646487343281} step=8578353
2023-01-10 06:05.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8578353.pt


Epoch 358/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:05.57 [info     ] DQN_20230110005015: epoch=358 step=8602382 epoch=358 metrics={'time_sample_batch': 6.339486417532652e-05, 'time_algorithm_update': 0.0018428012153828377, 'loss': 2.5355281423400953e-11, 'time_step': 0.002193800354972305, 'td_error': 3.777122963254197e-14, 'value_scale': 1.529817917879743e-05, 'environment': -0.030331037216406554} step=8602382
2023-01-10 06:05.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8602382.pt


Epoch 359/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:06.51 [info     ] DQN_20230110005015: epoch=359 step=8626411 epoch=359 metrics={'time_sample_batch': 6.052528836018802e-05, 'time_algorithm_update': 0.0018080830291255276, 'loss': 2.5641759315247177e-11, 'time_step': 0.002155197550282158, 'td_error': 4.5343136089190737e-13, 'value_scale': -5.312818894948402e-05, 'environment': -0.34639439288075874} step=8626411
2023-01-10 06:06.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8626411.pt


Epoch 360/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:07.45 [info     ] DQN_20230110005015: epoch=360 step=8650440 epoch=360 metrics={'time_sample_batch': 6.170688365460177e-05, 'time_algorithm_update': 0.0018369902772271205, 'loss': 2.5628572075242635e-11, 'time_step': 0.0021847194239979027, 'td_error': 5.803538548040584e-13, 'value_scale': -6.010503985458914e-05, 'environment': -0.05348270397268244} step=8650440
2023-01-10 06:07.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8650440.pt


Epoch 361/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:08.39 [info     ] DQN_20230110005015: epoch=361 step=8674469 epoch=361 metrics={'time_sample_batch': 5.984551411769657e-05, 'time_algorithm_update': 0.001788557203349125, 'loss': 2.6595966819704472e-11, 'time_step': 0.002134412250505305, 'td_error': 4.618213860270217e-14, 'value_scale': 1.691236935166018e-05, 'environment': -0.3919948141740779} step=8674469
2023-01-10 06:08.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8674469.pt


Epoch 362/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:09.33 [info     ] DQN_20230110005015: epoch=362 step=8698498 epoch=362 metrics={'time_sample_batch': 6.079059587971013e-05, 'time_algorithm_update': 0.0018285643052775005, 'loss': 2.552406135539717e-11, 'time_step': 0.002176636003258275, 'td_error': 9.56608943052823e-16, 'value_scale': 2.1733081889314136e-06, 'environment': -0.317835441464178} step=8698498
2023-01-10 06:09.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8698498.pt


Epoch 363/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:10.27 [info     ] DQN_20230110005015: epoch=363 step=8722527 epoch=363 metrics={'time_sample_batch': 6.10575008602683e-05, 'time_algorithm_update': 0.0018156810796852688, 'loss': 2.5423121201350156e-11, 'time_step': 0.0021652455305883494, 'td_error': 6.823527714406393e-15, 'value_scale': -6.446008668059405e-06, 'environment': -0.2295578567615662} step=8722527
2023-01-10 06:10.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8722527.pt


Epoch 364/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:11.20 [info     ] DQN_20230110005015: epoch=364 step=8746556 epoch=364 metrics={'time_sample_batch': 6.0054821201892645e-05, 'time_algorithm_update': 0.001775801834172487, 'loss': 2.596309105590991e-11, 'time_step': 0.0021168111370686283, 'td_error': 1.323499394898711e-13, 'value_scale': -2.869019514436339e-05, 'environment': -0.0602785158179185} step=8746556
2023-01-10 06:11.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8746556.pt


Epoch 365/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:12.12 [info     ] DQN_20230110005015: epoch=365 step=8770585 epoch=365 metrics={'time_sample_batch': 5.9999713757207885e-05, 'time_algorithm_update': 0.0017756958758718409, 'loss': 2.454601901777595e-11, 'time_step': 0.0021187406814799354, 'td_error': 2.7548954212609084e-13, 'value_scale': 4.139712735958422e-05, 'environment': -0.27778876290664095} step=8770585
2023-01-10 06:12.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8770585.pt


Epoch 366/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:13.06 [info     ] DQN_20230110005015: epoch=366 step=8794614 epoch=366 metrics={'time_sample_batch': 6.02342726323155e-05, 'time_algorithm_update': 0.0018043878834625325, 'loss': 2.575020910510354e-11, 'time_step': 0.0021515057781393634, 'td_error': 1.727110405640673e-13, 'value_scale': 3.277588846194947e-05, 'environment': -0.12267603251313022} step=8794614
2023-01-10 06:13.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8794614.pt


Epoch 367/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:14.00 [info     ] DQN_20230110005015: epoch=367 step=8818643 epoch=367 metrics={'time_sample_batch': 6.0939060534880826e-05, 'time_algorithm_update': 0.001824191270574479, 'loss': 2.5527336386244014e-11, 'time_step': 0.0021742870012985255, 'td_error': 6.012473918878788e-13, 'value_scale': 6.117116600050483e-05, 'environment': -0.3248442820739615} step=8818643
2023-01-10 06:14.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8818643.pt


Epoch 368/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:14.55 [info     ] DQN_20230110005015: epoch=368 step=8842672 epoch=368 metrics={'time_sample_batch': 6.21791963268891e-05, 'time_algorithm_update': 0.0018435759244526146, 'loss': 2.5535679036021738e-11, 'time_step': 0.0021956091273604393, 'td_error': 2.7214948969667385e-13, 'value_scale': -4.1142671722412285e-05, 'environment': 0.0006420346906045871} step=8842672
2023-01-10 06:14.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8842672.pt


Epoch 369/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:15.48 [info     ] DQN_20230110005015: epoch=369 step=8866701 epoch=369 metrics={'time_sample_batch': 5.901816821067569e-05, 'time_algorithm_update': 0.001781425422891665, 'loss': 2.6316148758490286e-11, 'time_step': 0.00212683345877706, 'td_error': 1.8502141236632725e-14, 'value_scale': -1.0672907055802491e-05, 'environment': -0.19526371259892528} step=8866701
2023-01-10 06:15.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8866701.pt


Epoch 370/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:16.42 [info     ] DQN_20230110005015: epoch=370 step=8890730 epoch=370 metrics={'time_sample_batch': 5.990803338470389e-05, 'time_algorithm_update': 0.0018114905325808503, 'loss': 2.6428012001551573e-11, 'time_step': 0.0021581896245690428, 'td_error': 4.759799724393686e-13, 'value_scale': -5.443221924420977e-05, 'environment': -0.2038723875910396} step=8890730
2023-01-10 06:16.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8890730.pt


Epoch 371/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:17.36 [info     ] DQN_20230110005015: epoch=371 step=8914759 epoch=371 metrics={'time_sample_batch': 6.123404511004789e-05, 'time_algorithm_update': 0.0018282190254849933, 'loss': 2.509455309573066e-11, 'time_step': 0.0021765833465767948, 'td_error': 6.929419246484444e-13, 'value_scale': -6.566446953530498e-05, 'environment': -0.186242660845005} step=8914759
2023-01-10 06:17.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8914759.pt


Epoch 372/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:18.30 [info     ] DQN_20230110005015: epoch=372 step=8938788 epoch=372 metrics={'time_sample_batch': 6.047657075964748e-05, 'time_algorithm_update': 0.0017910229588741617, 'loss': 2.5050919188982776e-11, 'time_step': 0.0021368395975106484, 'td_error': 1.4436940541644429e-13, 'value_scale': 2.9934824413214794e-05, 'environment': -0.11785052390642632} step=8938788
2023-01-10 06:18.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8938788.pt


Epoch 373/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:19.23 [info     ] DQN_20230110005015: epoch=373 step=8962817 epoch=373 metrics={'time_sample_batch': 6.199518072207753e-05, 'time_algorithm_update': 0.001786650251601042, 'loss': 2.4940732601156004e-11, 'time_step': 0.002136121166695508, 'td_error': 3.1747946591317114e-13, 'value_scale': -4.442363197541158e-05, 'environment': -0.16713891927399682} step=8962817
2023-01-10 06:19.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8962817.pt


Epoch 374/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:20.18 [info     ] DQN_20230110005015: epoch=374 step=8986846 epoch=374 metrics={'time_sample_batch': 6.0553794605881095e-05, 'time_algorithm_update': 0.0018300090153033062, 'loss': 2.523709922694781e-11, 'time_step': 0.0021790298035818615, 'td_error': 3.348553759218521e-15, 'value_scale': -3.904969788101792e-06, 'environment': -0.13886291911737048} step=8986846
2023-01-10 06:20.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_8986846.pt


Epoch 375/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:21.12 [info     ] DQN_20230110005015: epoch=375 step=9010875 epoch=375 metrics={'time_sample_batch': 6.233392183866695e-05, 'time_algorithm_update': 0.001828751882922759, 'loss': 2.6055576900229818e-11, 'time_step': 0.002182593530788816, 'td_error': 1.244839162122766e-12, 'value_scale': -8.80197914582682e-05, 'environment': -0.17943396520681648} step=9010875
2023-01-10 06:21.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9010875.pt


Epoch 376/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:22.07 [info     ] DQN_20230110005015: epoch=376 step=9034904 epoch=376 metrics={'time_sample_batch': 6.26236080026958e-05, 'time_algorithm_update': 0.0018580598456914457, 'loss': 2.6069768702427768e-11, 'time_step': 0.002223431858172443, 'td_error': 6.95654574682788e-14, 'value_scale': 2.0728703697072888e-05, 'environment': -0.22978545564736202} step=9034904
2023-01-10 06:22.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9034904.pt


Epoch 377/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:23.00 [info     ] DQN_20230110005015: epoch=377 step=9058933 epoch=377 metrics={'time_sample_batch': 6.04676805417077e-05, 'time_algorithm_update': 0.0017705376146537116, 'loss': 2.4907741489468643e-11, 'time_step': 0.002118412080768184, 'td_error': 2.4414822978054486e-13, 'value_scale': 3.894697373268099e-05, 'environment': -0.215665657972741} step=9058933
2023-01-10 06:23.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9058933.pt


Epoch 378/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:23.54 [info     ] DQN_20230110005015: epoch=378 step=9082962 epoch=378 metrics={'time_sample_batch': 6.047660052600219e-05, 'time_algorithm_update': 0.0017910525962413337, 'loss': 2.59605410782733e-11, 'time_step': 0.002137351052939378, 'td_error': 5.108870581258387e-13, 'value_scale': 5.6368193833215564e-05, 'environment': -0.5488944507776582} step=9082962
2023-01-10 06:23.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9082962.pt


Epoch 379/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:24.47 [info     ] DQN_20230110005015: epoch=379 step=9106991 epoch=379 metrics={'time_sample_batch': 6.0295561556661514e-05, 'time_algorithm_update': 0.0018027640295919692, 'loss': 2.4907674988958612e-11, 'time_step': 0.002153287403612003, 'td_error': 2.459792597866214e-13, 'value_scale': 3.909765141943585e-05, 'environment': 0.007025464992773856} step=9106991
2023-01-10 06:24.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9106991.pt


Epoch 380/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:25.41 [info     ] DQN_20230110005015: epoch=380 step=9131020 epoch=380 metrics={'time_sample_batch': 6.086917905614212e-05, 'time_algorithm_update': 0.001788247534038968, 'loss': 2.6061469231095525e-11, 'time_step': 0.002136657586173721, 'td_error': 3.0465033749347083e-14, 'value_scale': -1.3634808049417488e-05, 'environment': -0.31351460867401687} step=9131020
2023-01-10 06:25.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9131020.pt


Epoch 381/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:26.35 [info     ] DQN_20230110005015: epoch=381 step=9155049 epoch=381 metrics={'time_sample_batch': 6.0506982052041934e-05, 'time_algorithm_update': 0.0018171666192276171, 'loss': 2.5485136802014144e-11, 'time_step': 0.0021684848739838025, 'td_error': 5.471389284495055e-16, 'value_scale': 1.0002987330043143e-06, 'environment': -0.4346223382532897} step=9155049
2023-01-10 06:26.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9155049.pt


Epoch 382/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:27.29 [info     ] DQN_20230110005015: epoch=382 step=9179078 epoch=382 metrics={'time_sample_batch': 6.10123453001746e-05, 'time_algorithm_update': 0.0018097829459544905, 'loss': 2.5433286992567382e-11, 'time_step': 0.002160023837268335, 'td_error': 6.251368573033698e-14, 'value_scale': -1.9680757275367506e-05, 'environment': -0.18110708311316095} step=9179078
2023-01-10 06:27.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9179078.pt


Epoch 383/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:28.23 [info     ] DQN_20230110005015: epoch=383 step=9203107 epoch=383 metrics={'time_sample_batch': 6.089200985020399e-05, 'time_algorithm_update': 0.0018276350195277193, 'loss': 2.539727056075408e-11, 'time_step': 0.0021762364990896062, 'td_error': 4.137972470243315e-14, 'value_scale': 1.600909016991016e-05, 'environment': 0.20208539444144707} step=9203107
2023-01-10 06:28.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9203107.pt


Epoch 384/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:29.17 [info     ] DQN_20230110005015: epoch=384 step=9227136 epoch=384 metrics={'time_sample_batch': 6.081787178274189e-05, 'time_algorithm_update': 0.0018169033655865698, 'loss': 2.6019589025386215e-11, 'time_step': 0.002165432314464149, 'td_error': 1.3726090138283503e-12, 'value_scale': -9.244533776255404e-05, 'environment': -0.3447322548173335} step=9227136
2023-01-10 06:29.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9227136.pt


Epoch 385/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:30.10 [info     ] DQN_20230110005015: epoch=385 step=9251165 epoch=385 metrics={'time_sample_batch': 5.9215519142396946e-05, 'time_algorithm_update': 0.0017712160692544777, 'loss': 2.5442082237738524e-11, 'time_step': 0.002115512272258779, 'td_error': 2.945028901654321e-16, 'value_scale': 2.2417504420144395e-07, 'environment': -0.1864666295108784} step=9251165
2023-01-10 06:30.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9251165.pt


Epoch 386/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:31.04 [info     ] DQN_20230110005015: epoch=386 step=9275194 epoch=386 metrics={'time_sample_batch': 6.132427685328938e-05, 'time_algorithm_update': 0.0018170368379210853, 'loss': 2.6117349564722296e-11, 'time_step': 0.00216554443440022, 'td_error': 2.7713040605971606e-14, 'value_scale': -1.3088104180681359e-05, 'environment': -1.0579916216794039} step=9275194
2023-01-10 06:31.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9275194.pt


Epoch 387/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:31.59 [info     ] DQN_20230110005015: epoch=387 step=9299223 epoch=387 metrics={'time_sample_batch': 6.100228427228293e-05, 'time_algorithm_update': 0.0018390633450787166, 'loss': 2.5184546326599425e-11, 'time_step': 0.002192493105972546, 'td_error': 6.324664396377838e-13, 'value_scale': -6.274225311153812e-05, 'environment': -0.15454235239976336} step=9299223
2023-01-10 06:31.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9299223.pt


Epoch 388/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:32.53 [info     ] DQN_20230110005015: epoch=388 step=9323252 epoch=388 metrics={'time_sample_batch': 6.068739592793372e-05, 'time_algorithm_update': 0.001826730201721509, 'loss': 2.7887501810249087e-11, 'time_step': 0.0021791970011962626, 'td_error': 1.8070508716245772e-12, 'value_scale': -0.00010607310103247752, 'environment': -0.487335914674156} step=9323252
2023-01-10 06:32.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9323252.pt


Epoch 389/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:33.46 [info     ] DQN_20230110005015: epoch=389 step=9347281 epoch=389 metrics={'time_sample_batch': 5.971737988279218e-05, 'time_algorithm_update': 0.001800403279844228, 'loss': 2.5056742821327697e-11, 'time_step': 0.0021468976289226132, 'td_error': 7.395738570121445e-14, 'value_scale': 2.1435146502690324e-05, 'environment': -0.33343776074032805} step=9347281
2023-01-10 06:33.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9347281.pt


Epoch 390/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:34.41 [info     ] DQN_20230110005015: epoch=390 step=9371310 epoch=390 metrics={'time_sample_batch': 6.091623966293719e-05, 'time_algorithm_update': 0.001823006430747402, 'loss': 2.5201257415302547e-11, 'time_step': 0.0021720893414082353, 'td_error': 2.0538274419148263e-13, 'value_scale': -3.573981097783126e-05, 'environment': -0.10929979060179765} step=9371310
2023-01-10 06:34.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9371310.pt


Epoch 391/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:35.33 [info     ] DQN_20230110005015: epoch=391 step=9395339 epoch=391 metrics={'time_sample_batch': 5.9523432237625994e-05, 'time_algorithm_update': 0.001771520132567831, 'loss': 2.462785260485977e-11, 'time_step': 0.002114980982515695, 'td_error': 1.8140415513237604e-13, 'value_scale': -3.358291253443951e-05, 'environment': -0.25097888941046687} step=9395339
2023-01-10 06:35.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9395339.pt


Epoch 392/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:36.27 [info     ] DQN_20230110005015: epoch=392 step=9419368 epoch=392 metrics={'time_sample_batch': 6.0898260784692905e-05, 'time_algorithm_update': 0.0018145697429100868, 'loss': 2.5791206075989406e-11, 'time_step': 0.0021649853131154827, 'td_error': 7.991255619273567e-14, 'value_scale': 2.2290355978161653e-05, 'environment': -0.06513824372587051} step=9419368
2023-01-10 06:36.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9419368.pt


Epoch 393/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:37.24 [info     ] DQN_20230110005015: epoch=393 step=9443397 epoch=393 metrics={'time_sample_batch': 6.425576668622277e-05, 'time_algorithm_update': 0.0018671073986601008, 'loss': 2.5345682842085487e-11, 'time_step': 0.0022497517353457764, 'td_error': 2.767504572768901e-13, 'value_scale': -4.1490484487870894e-05, 'environment': -0.0689345165787579} step=9443397
2023-01-10 06:37.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9443397.pt


Epoch 394/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:38.19 [info     ] DQN_20230110005015: epoch=394 step=9467426 epoch=394 metrics={'time_sample_batch': 6.293826813832558e-05, 'time_algorithm_update': 0.0018631967545111303, 'loss': 2.5066602473781845e-11, 'time_step': 0.0022163584495365676, 'td_error': 1.7554073613678928e-13, 'value_scale': -3.302566643417469e-05, 'environment': -0.13977768402935614} step=9467426
2023-01-10 06:38.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9467426.pt


Epoch 395/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:39.13 [info     ] DQN_20230110005015: epoch=395 step=9491455 epoch=395 metrics={'time_sample_batch': 6.061727631835734e-05, 'time_algorithm_update': 0.00181618881431966, 'loss': 2.5759760980721497e-11, 'time_step': 0.002167068055032359, 'td_error': 1.1531122369467412e-12, 'value_scale': -8.47318300335557e-05, 'environment': 0.01265334164227382} step=9491455
2023-01-10 06:39.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9491455.pt


Epoch 396/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:40.06 [info     ] DQN_20230110005015: epoch=396 step=9515484 epoch=396 metrics={'time_sample_batch': 6.004828252597488e-05, 'time_algorithm_update': 0.0017997231980160712, 'loss': 2.5295173090090355e-11, 'time_step': 0.0021465313936163905, 'td_error': 6.655292978337407e-14, 'value_scale': 2.0335074149238583e-05, 'environment': -0.1048612781212686} step=9515484
2023-01-10 06:40.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9515484.pt


Epoch 397/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:41.01 [info     ] DQN_20230110005015: epoch=397 step=9539513 epoch=397 metrics={'time_sample_batch': 6.150541504381242e-05, 'time_algorithm_update': 0.0018200601768025328, 'loss': 2.5009493793627004e-11, 'time_step': 0.002174556605095244, 'td_error': 1.6393123366138832e-13, 'value_scale': 3.194112720644069e-05, 'environment': 0.1746672224681502} step=9539513
2023-01-10 06:41.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9539513.pt


Epoch 398/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:41.54 [info     ] DQN_20230110005015: epoch=398 step=9563542 epoch=398 metrics={'time_sample_batch': 5.9989156623404395e-05, 'time_algorithm_update': 0.0017950019862620174, 'loss': 2.5964599025441076e-11, 'time_step': 0.0021430211069382575, 'td_error': 2.1783367196060217e-14, 'value_scale': -1.1598542102021421e-05, 'environment': -0.2851023768614467} step=9563542
2023-01-10 06:41.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9563542.pt


Epoch 399/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:42.48 [info     ] DQN_20230110005015: epoch=399 step=9587571 epoch=399 metrics={'time_sample_batch': 6.0754221394255625e-05, 'time_algorithm_update': 0.0018208086517916928, 'loss': 2.4981987798158522e-11, 'time_step': 0.0021730704999921562, 'td_error': 4.216923910085456e-13, 'value_scale': -5.123271549116544e-05, 'environment': -0.155315651911697} step=9587571
2023-01-10 06:42.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9587571.pt


Epoch 400/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:43.41 [info     ] DQN_20230110005015: epoch=400 step=9611600 epoch=400 metrics={'time_sample_batch': 5.9944060596020125e-05, 'time_algorithm_update': 0.001782985715672806, 'loss': 2.5373807243998075e-11, 'time_step': 0.002128048928183133, 'td_error': 1.0905947403637037e-12, 'value_scale': -8.240235853307587e-05, 'environment': 0.001838617502414494} step=9611600
2023-01-10 06:43.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9611600.pt


Epoch 401/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:44.35 [info     ] DQN_20230110005015: epoch=401 step=9635629 epoch=401 metrics={'time_sample_batch': 6.035145284868695e-05, 'time_algorithm_update': 0.001798507292036796, 'loss': 2.4862423351071296e-11, 'time_step': 0.0021464383241473336, 'td_error': 1.8910165869746016e-12, 'value_scale': -0.0001085075000760174, 'environment': -0.02404835401029851} step=9635629
2023-01-10 06:44.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9635629.pt


Epoch 402/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:45.29 [info     ] DQN_20230110005015: epoch=402 step=9659658 epoch=402 metrics={'time_sample_batch': 6.0875638355114e-05, 'time_algorithm_update': 0.0018043602205968894, 'loss': 2.489119290664072e-11, 'time_step': 0.0021531738152024333, 'td_error': 2.1407248953544671e-13, 'value_scale': -3.649868309110969e-05, 'environment': -0.0890196497588767} step=9659658
2023-01-10 06:45.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9659658.pt


Epoch 403/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:46.23 [info     ] DQN_20230110005015: epoch=403 step=9683687 epoch=403 metrics={'time_sample_batch': 6.095304079947586e-05, 'time_algorithm_update': 0.0018395130948541346, 'loss': 2.5178623022732736e-11, 'time_step': 0.0021892323804122934, 'td_error': 2.4611478662675873e-13, 'value_scale': 3.9136319006489694e-05, 'environment': -0.1969039119069514} step=9683687
2023-01-10 06:46.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9683687.pt


Epoch 404/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:47.17 [info     ] DQN_20230110005015: epoch=404 step=9707716 epoch=404 metrics={'time_sample_batch': 5.99804549257111e-05, 'time_algorithm_update': 0.0017882353893662467, 'loss': 2.6870677909705792e-11, 'time_step': 0.00213540047363741, 'td_error': 1.02261932649438e-12, 'value_scale': -7.979354404611132e-05, 'environment': -0.1861252451375637} step=9707716
2023-01-10 06:47.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9707716.pt


Epoch 405/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:48.10 [info     ] DQN_20230110005015: epoch=405 step=9731745 epoch=405 metrics={'time_sample_batch': 6.062220761112082e-05, 'time_algorithm_update': 0.0017915768412804702, 'loss': 2.5159087127310767e-11, 'time_step': 0.002144627140684468, 'td_error': 4.196282016865299e-15, 'value_scale': -5.020989445424329e-06, 'environment': -0.245096800132316} step=9731745
2023-01-10 06:48.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9731745.pt


Epoch 406/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:49.04 [info     ] DQN_20230110005015: epoch=406 step=9755774 epoch=406 metrics={'time_sample_batch': 6.069238675340661e-05, 'time_algorithm_update': 0.0017984582866948267, 'loss': 2.5105357168159795e-11, 'time_step': 0.002148981650792742, 'td_error': 1.4637647039375253e-14, 'value_scale': 9.513266434895317e-06, 'environment': -0.2763523679249601} step=9755774
2023-01-10 06:49.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9755774.pt


Epoch 407/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:49.58 [info     ] DQN_20230110005015: epoch=407 step=9779803 epoch=407 metrics={'time_sample_batch': 6.090807375962867e-05, 'time_algorithm_update': 0.0018263176698115956, 'loss': 2.4891726220429638e-11, 'time_step': 0.0021780955071623712, 'td_error': 2.0603814170340294e-14, 'value_scale': -1.1302048910817522e-05, 'environment': -0.10141098891726301} step=9779803
2023-01-10 06:49.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9779803.pt


Epoch 408/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:50.52 [info     ] DQN_20230110005015: epoch=408 step=9803832 epoch=408 metrics={'time_sample_batch': 6.104626902242472e-05, 'time_algorithm_update': 0.0018220929211656164, 'loss': 2.4672337260908587e-11, 'time_step': 0.0021719857445717293, 'td_error': 5.852199188630863e-13, 'value_scale': 6.036342031313916e-05, 'environment': -0.4364211053392462} step=9803832
2023-01-10 06:50.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9803832.pt


Epoch 409/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:51.46 [info     ] DQN_20230110005015: epoch=409 step=9827861 epoch=409 metrics={'time_sample_batch': 6.0204982539281755e-05, 'time_algorithm_update': 0.0018147774822996016, 'loss': 2.6915083357954737e-11, 'time_step': 0.0021643065806954058, 'td_error': 5.013584977199439e-13, 'value_scale': 5.586621766507367e-05, 'environment': -0.30404883704719693} step=9827861
2023-01-10 06:51.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9827861.pt


Epoch 410/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:52.40 [info     ] DQN_20230110005015: epoch=410 step=9851890 epoch=410 metrics={'time_sample_batch': 5.938123836118068e-05, 'time_algorithm_update': 0.001800543489297026, 'loss': 2.5675005352022466e-11, 'time_step': 0.0021476576631795185, 'td_error': 2.2875831847486644e-15, 'value_scale': 3.69838833266884e-06, 'environment': -0.42970913843699765} step=9851890
2023-01-10 06:52.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9851890.pt


Epoch 411/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:53.33 [info     ] DQN_20230110005015: epoch=411 step=9875919 epoch=411 metrics={'time_sample_batch': 6.004876870976846e-05, 'time_algorithm_update': 0.0017841904592690652, 'loss': 2.5758600858604942e-11, 'time_step': 0.002132318048541865, 'td_error': 2.4635813004921153e-12, 'value_scale': -0.00012385476580467744, 'environment': 0.08496377932347711} step=9875919
2023-01-10 06:53.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9875919.pt


Epoch 412/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:54.27 [info     ] DQN_20230110005015: epoch=412 step=9899948 epoch=412 metrics={'time_sample_batch': 6.068541150428645e-05, 'time_algorithm_update': 0.001817720144437669, 'loss': 2.5042687004388673e-11, 'time_step': 0.0021670021126345604, 'td_error': 2.0335175966554985e-14, 'value_scale': -1.1198731232177436e-05, 'environment': -0.005646656578790377} step=9899948
2023-01-10 06:54.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9899948.pt


Epoch 413/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:55.21 [info     ] DQN_20230110005015: epoch=413 step=9923977 epoch=413 metrics={'time_sample_batch': 6.151166597830133e-05, 'time_algorithm_update': 0.001816657505418791, 'loss': 2.5240035841288755e-11, 'time_step': 0.0021728292932978303, 'td_error': 1.7405146908673174e-13, 'value_scale': -3.2900749122132856e-05, 'environment': -0.3011003140718603} step=9923977
2023-01-10 06:55.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9923977.pt


Epoch 414/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:56.15 [info     ] DQN_20230110005015: epoch=414 step=9948006 epoch=414 metrics={'time_sample_batch': 6.072246069378103e-05, 'time_algorithm_update': 0.0017937860604385059, 'loss': 2.4918096841706705e-11, 'time_step': 0.0021445446382713325, 'td_error': 3.552600697269933e-14, 'value_scale': 1.482981233814275e-05, 'environment': -0.26686886761334333} step=9948006
2023-01-10 06:56.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9948006.pt


Epoch 415/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:57.09 [info     ] DQN_20230110005015: epoch=415 step=9972035 epoch=415 metrics={'time_sample_batch': 6.143097931280323e-05, 'time_algorithm_update': 0.0018250369029454096, 'loss': 2.5676489738314384e-11, 'time_step': 0.002178586403962115, 'td_error': 2.05208776844078e-14, 'value_scale': -1.1247142359963676e-05, 'environment': -0.3117332690232454} step=9972035
2023-01-10 06:57.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9972035.pt


Epoch 416/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:58.02 [info     ] DQN_20230110005015: epoch=416 step=9996064 epoch=416 metrics={'time_sample_batch': 6.0094450142128675e-05, 'time_algorithm_update': 0.0017835437852130101, 'loss': 2.6525637027814358e-11, 'time_step': 0.002133292479929622, 'td_error': 1.573782855003912e-13, 'value_scale': 3.1287273527315856e-05, 'environment': 0.1512603925619484} step=9996064
2023-01-10 06:58.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_9996064.pt


Epoch 417/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:58.56 [info     ] DQN_20230110005015: epoch=417 step=10020093 epoch=417 metrics={'time_sample_batch': 6.157448290885574e-05, 'time_algorithm_update': 0.0018101188691895009, 'loss': 2.6078199469687566e-11, 'time_step': 0.002165188746305683, 'td_error': 3.028481451039524e-14, 'value_scale': -1.3667454350621267e-05, 'environment': -0.19097335714634436} step=10020093
2023-01-10 06:58.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10020093.pt


Epoch 418/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 06:59.50 [info     ] DQN_20230110005015: epoch=418 step=10044122 epoch=418 metrics={'time_sample_batch': 6.054523181784311e-05, 'time_algorithm_update': 0.0018202376438093083, 'loss': 2.4934881730778636e-11, 'time_step': 0.0021691541307809633, 'td_error': 2.865787336750073e-13, 'value_scale': 4.223239087202418e-05, 'environment': -0.31501949285016134} step=10044122
2023-01-10 06:59.50 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10044122.pt


Epoch 419/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:00.44 [info     ] DQN_20230110005015: epoch=419 step=10068151 epoch=419 metrics={'time_sample_batch': 6.003684232364835e-05, 'time_algorithm_update': 0.0017835871250254666, 'loss': 2.6126081077214412e-11, 'time_step': 0.0021369898382249835, 'td_error': 6.306450283302044e-13, 'value_scale': -6.265590529876997e-05, 'environment': -0.18893618218738342} step=10068151
2023-01-10 07:00.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10068151.pt


Epoch 420/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:01.38 [info     ] DQN_20230110005015: epoch=420 step=10092180 epoch=420 metrics={'time_sample_batch': 6.014184810094378e-05, 'time_algorithm_update': 0.0018068089696112691, 'loss': 2.468187278768568e-11, 'time_step': 0.0021606628117606386, 'td_error': 8.627529877155697e-14, 'value_scale': 2.3155448715323613e-05, 'environment': -0.6785125608756891} step=10092180
2023-01-10 07:01.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10092180.pt


Epoch 421/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:02.32 [info     ] DQN_20230110005015: epoch=421 step=10116209 epoch=421 metrics={'time_sample_batch': 6.241342777209493e-05, 'time_algorithm_update': 0.0018335521342706838, 'loss': 2.5764509364266985e-11, 'time_step': 0.0021998293316762662, 'td_error': 1.0321727990410248e-15, 'value_scale': -2.323965028991244e-06, 'environment': 0.2679042942518752} step=10116209
2023-01-10 07:02.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10116209.pt


Epoch 422/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:03.26 [info     ] DQN_20230110005015: epoch=422 step=10140238 epoch=422 metrics={'time_sample_batch': 6.102812146817042e-05, 'time_algorithm_update': 0.0018067949397360828, 'loss': 2.5582356404234076e-11, 'time_step': 0.0021610215062570015, 'td_error': 3.784616456605237e-14, 'value_scale': 1.531338774597264e-05, 'environment': -0.254084253367499} step=10140238
2023-01-10 07:03.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10140238.pt


Epoch 423/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:04.20 [info     ] DQN_20230110005015: epoch=423 step=10164267 epoch=423 metrics={'time_sample_batch': 6.008672081202255e-05, 'time_algorithm_update': 0.0018073581092450605, 'loss': 2.4683032830800692e-11, 'time_step': 0.002156837259697663, 'td_error': 2.3449955439315744e-14, 'value_scale': -1.2059810093507945e-05, 'environment': -0.04306546917106506} step=10164267
2023-01-10 07:04.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10164267.pt


Epoch 424/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:05.15 [info     ] DQN_20230110005015: epoch=424 step=10188296 epoch=424 metrics={'time_sample_batch': 6.140380263095383e-05, 'time_algorithm_update': 0.0018290958133071861, 'loss': 2.5645828712606865e-11, 'time_step': 0.002186444791060142, 'td_error': 4.606655460156227e-13, 'value_scale': 5.355256281146816e-05, 'environment': -0.047181976779905134} step=10188296
2023-01-10 07:05.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10188296.pt


Epoch 425/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:06.10 [info     ] DQN_20230110005015: epoch=425 step=10212325 epoch=425 metrics={'time_sample_batch': 6.155004473163958e-05, 'time_algorithm_update': 0.0018380870483327317, 'loss': 2.6086626657264827e-11, 'time_step': 0.002191000482037777, 'td_error': 9.669850868974659e-14, 'value_scale': 2.4521708124271386e-05, 'environment': -0.1552757225327271} step=10212325
2023-01-10 07:06.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10212325.pt


Epoch 426/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:07.03 [info     ] DQN_20230110005015: epoch=426 step=10236354 epoch=426 metrics={'time_sample_batch': 6.0257073660022664e-05, 'time_algorithm_update': 0.0017990808301593308, 'loss': 2.5350778408468887e-11, 'time_step': 0.0021524504828608843, 'td_error': 2.726657721085863e-13, 'value_scale': -4.118336572700604e-05, 'environment': -0.13969928630769055} step=10236354
2023-01-10 07:07.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10236354.pt


Epoch 427/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:07.56 [info     ] DQN_20230110005015: epoch=427 step=10260383 epoch=427 metrics={'time_sample_batch': 5.96852421418246e-05, 'time_algorithm_update': 0.0017724338306698629, 'loss': 2.5741650946326667e-11, 'time_step': 0.002121196147768361, 'td_error': 7.632595710925164e-13, 'value_scale': -6.89299403618057e-05, 'environment': -0.3190452594877422} step=10260383
2023-01-10 07:07.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10260383.pt


Epoch 428/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:08.51 [info     ] DQN_20230110005015: epoch=428 step=10284412 epoch=428 metrics={'time_sample_batch': 6.164026655276283e-05, 'time_algorithm_update': 0.0018271376634290033, 'loss': 2.4214983411311056e-11, 'time_step': 0.0021777270294574273, 'td_error': 1.3188688154172365e-12, 'value_scale': -9.061907003823824e-05, 'environment': -0.15767167267471277} step=10284412
2023-01-10 07:08.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10284412.pt


Epoch 429/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:09.45 [info     ] DQN_20230110005015: epoch=429 step=10308441 epoch=429 metrics={'time_sample_batch': 6.015189920671722e-05, 'time_algorithm_update': 0.0018109538948160367, 'loss': 2.5081088794161156e-11, 'time_step': 0.0021630285523338527, 'td_error': 1.3953215844912842e-15, 'value_scale': -2.783288648130324e-06, 'environment': -0.475048754949474} step=10308441
2023-01-10 07:09.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10308441.pt


Epoch 430/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:10.38 [info     ] DQN_20230110005015: epoch=430 step=10332470 epoch=430 metrics={'time_sample_batch': 6.019871176055638e-05, 'time_algorithm_update': 0.0017819780352668392, 'loss': 2.5632072894602915e-11, 'time_step': 0.002130809122566834, 'td_error': 2.3987205827129246e-13, 'value_scale': -3.8635831433024674e-05, 'environment': -0.2883189130318463} step=10332470
2023-01-10 07:10.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10332470.pt


Epoch 431/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:11.32 [info     ] DQN_20230110005015: epoch=431 step=10356499 epoch=431 metrics={'time_sample_batch': 6.221604707401896e-05, 'time_algorithm_update': 0.0018318620204945382, 'loss': 2.5267058615217867e-11, 'time_step': 0.0021891492628278277, 'td_error': 1.1689346854277e-13, 'value_scale': 2.6949465740466913e-05, 'environment': -0.11665393751042119} step=10356499
2023-01-10 07:11.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10356499.pt


Epoch 432/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:12.26 [info     ] DQN_20230110005015: epoch=432 step=10380528 epoch=432 metrics={'time_sample_batch': 6.008489514226706e-05, 'time_algorithm_update': 0.0017994000643914675, 'loss': 2.4994799462553098e-11, 'time_step': 0.0021489853616649627, 'td_error': 1.047381594222529e-13, 'value_scale': -2.5507309994861657e-05, 'environment': -0.23903428705366087} step=10380528
2023-01-10 07:12.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10380528.pt


Epoch 433/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:13.20 [info     ] DQN_20230110005015: epoch=433 step=10404557 epoch=433 metrics={'time_sample_batch': 6.037781591684096e-05, 'time_algorithm_update': 0.0018151505936337617, 'loss': 2.4435121659697705e-11, 'time_step': 0.0021683827059323225, 'td_error': 4.499528828506303e-15, 'value_scale': 5.167348430358097e-06, 'environment': -0.11798621399752054} step=10404557
2023-01-10 07:13.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10404557.pt


Epoch 434/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:14.14 [info     ] DQN_20230110005015: epoch=434 step=10428586 epoch=434 metrics={'time_sample_batch': 6.1234511449605e-05, 'time_algorithm_update': 0.001831909487908181, 'loss': 2.5472305787594973e-11, 'time_step': 0.0021861023886819235, 'td_error': 1.787022344351165e-13, 'value_scale': -3.3347776630864854e-05, 'environment': -0.3328526490596372} step=10428586
2023-01-10 07:14.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10428586.pt


Epoch 435/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:15.08 [info     ] DQN_20230110005015: epoch=435 step=10452615 epoch=435 metrics={'time_sample_batch': 5.9915395596435274e-05, 'time_algorithm_update': 0.0017820223672911193, 'loss': 2.5647434027796844e-11, 'time_step': 0.002132570199332606, 'td_error': 4.656047831530136e-13, 'value_scale': -5.383512053139768e-05, 'environment': -0.34450022105952816} step=10452615
2023-01-10 07:15.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10452615.pt


Epoch 436/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:16.02 [info     ] DQN_20230110005015: epoch=436 step=10476644 epoch=436 metrics={'time_sample_batch': 6.060943784495062e-05, 'time_algorithm_update': 0.0018187802930048696, 'loss': 2.5359656808943877e-11, 'time_step': 0.0021703426318696695, 'td_error': 2.6508810946854865e-14, 'value_scale': 1.2816636485606393e-05, 'environment': -0.10903870706316912} step=10476644
2023-01-10 07:16.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10476644.pt


Epoch 437/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:16.55 [info     ] DQN_20230110005015: epoch=437 step=10500673 epoch=437 metrics={'time_sample_batch': 6.010324113888609e-05, 'time_algorithm_update': 0.0017776376641770518, 'loss': 2.49322564710047e-11, 'time_step': 0.0021253189367273434, 'td_error': 4.18965284017269e-13, 'value_scale': 5.107242290134081e-05, 'environment': -0.4704723431005706} step=10500673
2023-01-10 07:16.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10500673.pt


Epoch 438/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:17.48 [info     ] DQN_20230110005015: epoch=438 step=10524702 epoch=438 metrics={'time_sample_batch': 6.0791310272223144e-05, 'time_algorithm_update': 0.001793293119682405, 'loss': 2.5364654821278102e-11, 'time_step': 0.0021456909306690614, 'td_error': 2.4086238921105143e-13, 'value_scale': 3.872043191998037e-05, 'environment': -0.07752958534193426} step=10524702
2023-01-10 07:17.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10524702.pt


Epoch 439/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:18.42 [info     ] DQN_20230110005015: epoch=439 step=10548731 epoch=439 metrics={'time_sample_batch': 6.065746089721461e-05, 'time_algorithm_update': 0.0018213337501329977, 'loss': 2.4367972875662248e-11, 'time_step': 0.0021743951226209472, 'td_error': 3.5074033928082134e-14, 'value_scale': -1.4749736924932738e-05, 'environment': -0.13621495712888376} step=10548731
2023-01-10 07:18.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10548731.pt


Epoch 440/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:19.38 [info     ] DQN_20230110005015: epoch=440 step=10572760 epoch=440 metrics={'time_sample_batch': 6.181864639441615e-05, 'time_algorithm_update': 0.0018545835720346266, 'loss': 2.6041443747898285e-11, 'time_step': 0.00221030953929707, 'td_error': 1.161593937568164e-12, 'value_scale': 8.503443066827863e-05, 'environment': 0.0418951714199602} step=10572760
2023-01-10 07:19.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10572760.pt


Epoch 441/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:20.32 [info     ] DQN_20230110005015: epoch=441 step=10596789 epoch=441 metrics={'time_sample_batch': 6.0965711344463697e-05, 'time_algorithm_update': 0.0018249871038339813, 'loss': 2.419576102760536e-11, 'time_step': 0.002176930561182358, 'td_error': 6.664872294907454e-13, 'value_scale': 6.439962278511736e-05, 'environment': 0.033075727939701115} step=10596789
2023-01-10 07:20.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10596789.pt


Epoch 442/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:21.26 [info     ] DQN_20230110005015: epoch=442 step=10620818 epoch=442 metrics={'time_sample_batch': 6.0738633746506296e-05, 'time_algorithm_update': 0.0018249997545347325, 'loss': 2.5448816966799855e-11, 'time_step': 0.002178210861708987, 'td_error': 1.8926741344443844e-13, 'value_scale': -3.4285517718439775e-05, 'environment': -0.1635437351056777} step=10620818
2023-01-10 07:21.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10620818.pt


Epoch 443/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:22.19 [info     ] DQN_20230110005015: epoch=443 step=10644847 epoch=443 metrics={'time_sample_batch': 5.962523317073108e-05, 'time_algorithm_update': 0.0017826478576247395, 'loss': 2.5528536551111958e-11, 'time_step': 0.0021341289641075385, 'td_error': 2.879408570692667e-13, 'value_scale': 4.230490141673269e-05, 'environment': -0.3181216213066605} step=10644847
2023-01-10 07:22.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10644847.pt


Epoch 444/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:23.14 [info     ] DQN_20230110005015: epoch=444 step=10668876 epoch=444 metrics={'time_sample_batch': 6.12519446113463e-05, 'time_algorithm_update': 0.001830215246530849, 'loss': 2.616008533301386e-11, 'time_step': 0.002185784553468458, 'td_error': 8.155673243452959e-13, 'value_scale': 7.124544846932933e-05, 'environment': -0.26544896689806513} step=10668876
2023-01-10 07:23.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10668876.pt


Epoch 445/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:24.08 [info     ] DQN_20230110005015: epoch=445 step=10692905 epoch=445 metrics={'time_sample_batch': 6.140415982721033e-05, 'time_algorithm_update': 0.0018018785896826745, 'loss': 2.572793617471604e-11, 'time_step': 0.0021549217947721333, 'td_error': 3.425177221066276e-14, 'value_scale': 1.4509507356354018e-05, 'environment': -0.17399696217374674} step=10692905
2023-01-10 07:24.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10692905.pt


Epoch 446/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:25.02 [info     ] DQN_20230110005015: epoch=446 step=10716934 epoch=446 metrics={'time_sample_batch': 6.144927569883108e-05, 'time_algorithm_update': 0.0018358525873059027, 'loss': 2.572367693866539e-11, 'time_step': 0.0021922018521138356, 'td_error': 1.080877996227235e-12, 'value_scale': -8.202672843081566e-05, 'environment': 0.012662559859490016} step=10716934
2023-01-10 07:25.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10716934.pt


Epoch 447/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:25.56 [info     ] DQN_20230110005015: epoch=447 step=10740963 epoch=447 metrics={'time_sample_batch': 5.9878257107876566e-05, 'time_algorithm_update': 0.0017981600675091147, 'loss': 2.5567526687433148e-11, 'time_step': 0.0021463427939929537, 'td_error': 1.14568767432632e-13, 'value_scale': -2.6658322825019735e-05, 'environment': 0.08239895121359268} step=10740963
2023-01-10 07:25.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10740963.pt


Epoch 448/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:26.51 [info     ] DQN_20230110005015: epoch=448 step=10764992 epoch=448 metrics={'time_sample_batch': 6.114451783720119e-05, 'time_algorithm_update': 0.0018350832064136186, 'loss': 2.6154503646020526e-11, 'time_step': 0.0021926255364846468, 'td_error': 2.575742909611791e-13, 'value_scale': 4.002249426787235e-05, 'environment': 0.08750616947417095} step=10764992
2023-01-10 07:26.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10764992.pt


Epoch 449/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:27.45 [info     ] DQN_20230110005015: epoch=449 step=10789021 epoch=449 metrics={'time_sample_batch': 6.099396953720086e-05, 'time_algorithm_update': 0.001812503412176773, 'loss': 2.508658048624683e-11, 'time_step': 0.00216526352931083, 'td_error': 8.498555700965026e-16, 'value_scale': 1.9474660523223956e-06, 'environment': -0.3632949297827735} step=10789021
2023-01-10 07:27.45 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10789021.pt


Epoch 450/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:28.38 [info     ] DQN_20230110005015: epoch=450 step=10813050 epoch=450 metrics={'time_sample_batch': 6.119714475232686e-05, 'time_algorithm_update': 0.001796291425059542, 'loss': 2.5340729678570252e-11, 'time_step': 0.0021477192993780026, 'td_error': 3.5080446280035333e-13, 'value_scale': 4.672702082430835e-05, 'environment': -0.36415099046617094} step=10813050
2023-01-10 07:28.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10813050.pt


Epoch 451/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:29.33 [info     ] DQN_20230110005015: epoch=451 step=10837079 epoch=451 metrics={'time_sample_batch': 6.13413925072471e-05, 'time_algorithm_update': 0.0018365913485413094, 'loss': 2.5779995958438218e-11, 'time_step': 0.0021928377408273678, 'td_error': 7.631732130437195e-13, 'value_scale': 6.893088929682188e-05, 'environment': -0.29089904348966844} step=10837079
2023-01-10 07:29.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10837079.pt


Epoch 452/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:30.28 [info     ] DQN_20230110005015: epoch=452 step=10861108 epoch=452 metrics={'time_sample_batch': 6.138576422000012e-05, 'time_algorithm_update': 0.0018247255171087978, 'loss': 2.5447219722992e-11, 'time_step': 0.0021814436168179313, 'td_error': 1.142508404963286e-12, 'value_scale': 8.434209701852658e-05, 'environment': 0.02009708201321645} step=10861108
2023-01-10 07:30.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10861108.pt


Epoch 453/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:31.22 [info     ] DQN_20230110005015: epoch=453 step=10885137 epoch=453 metrics={'time_sample_batch': 6.13863595470943e-05, 'time_algorithm_update': 0.0018280407845529954, 'loss': 2.4976340576615767e-11, 'time_step': 0.0021852168495514464, 'td_error': 5.844833987937122e-13, 'value_scale': 6.032075461063251e-05, 'environment': -0.08087176687305726} step=10885137
2023-01-10 07:31.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10885137.pt


Epoch 454/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:32.16 [info     ] DQN_20230110005015: epoch=454 step=10909166 epoch=454 metrics={'time_sample_batch': 6.096976949082237e-05, 'time_algorithm_update': 0.001821165728982783, 'loss': 2.5411239236974583e-11, 'time_step': 0.0021741551561914006, 'td_error': 9.781770885582701e-14, 'value_scale': -2.465335314583057e-05, 'environment': -0.12874953155568947} step=10909166
2023-01-10 07:32.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10909166.pt


Epoch 455/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:33.11 [info     ] DQN_20230110005015: epoch=455 step=10933195 epoch=455 metrics={'time_sample_batch': 6.139165795823252e-05, 'time_algorithm_update': 0.0018396817510199163, 'loss': 2.6636308000131828e-11, 'time_step': 0.002196914441547142, 'td_error': 4.218773216516413e-13, 'value_scale': 5.123398684359648e-05, 'environment': -0.13618266347101818} step=10933195
2023-01-10 07:33.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10933195.pt


Epoch 456/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:34.06 [info     ] DQN_20230110005015: epoch=456 step=10957224 epoch=456 metrics={'time_sample_batch': 6.17231559285094e-05, 'time_algorithm_update': 0.001845387891762821, 'loss': 2.5215309863399993e-11, 'time_step': 0.00220067500373567, 'td_error': 9.979048193120916e-13, 'value_scale': -7.881461270485363e-05, 'environment': 0.13483159662755945} step=10957224
2023-01-10 07:34.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10957224.pt


Epoch 457/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:35.01 [info     ] DQN_20230110005015: epoch=457 step=10981253 epoch=457 metrics={'time_sample_batch': 6.11602642388423e-05, 'time_algorithm_update': 0.001828203894254683, 'loss': 2.476664442046556e-11, 'time_step': 0.0021821272606865347, 'td_error': 1.0217043355629841e-13, 'value_scale': -2.5194556432828145e-05, 'environment': 0.06509825126894594} step=10981253
2023-01-10 07:35.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_10981253.pt


Epoch 458/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:35.55 [info     ] DQN_20230110005015: epoch=458 step=11005282 epoch=458 metrics={'time_sample_batch': 6.193614411857116e-05, 'time_algorithm_update': 0.0018213682493381056, 'loss': 2.518615609233712e-11, 'time_step': 0.0021745709326339772, 'td_error': 3.1911963517521066e-13, 'value_scale': -4.456445952454777e-05, 'environment': 0.08586695586438672} step=11005282
2023-01-10 07:35.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11005282.pt


Epoch 459/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:36.49 [info     ] DQN_20230110005015: epoch=459 step=11029311 epoch=459 metrics={'time_sample_batch': 6.0863612747811525e-05, 'time_algorithm_update': 0.0017944609232324702, 'loss': 2.5782006271652532e-11, 'time_step': 0.002146140253793395, 'td_error': 5.233327165339429e-13, 'value_scale': -5.7071467355339795e-05, 'environment': 0.09976898500662654} step=11029311
2023-01-10 07:36.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11029311.pt


Epoch 460/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:37.43 [info     ] DQN_20230110005015: epoch=460 step=11053340 epoch=460 metrics={'time_sample_batch': 6.120278051548512e-05, 'time_algorithm_update': 0.0018166889585336003, 'loss': 2.395838980513237e-11, 'time_step': 0.002169023932745466, 'td_error': 2.0910059791526417e-16, 'value_scale': 1.3112087596043853e-07, 'environment': 0.053960778163840886} step=11053340
2023-01-10 07:37.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11053340.pt


Epoch 461/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:38.36 [info     ] DQN_20230110005015: epoch=461 step=11077369 epoch=461 metrics={'time_sample_batch': 6.063784486946132e-05, 'time_algorithm_update': 0.0017849170460653959, 'loss': 2.5424166353721118e-11, 'time_step': 0.002136210168096088, 'td_error': 8.81199391987938e-16, 'value_scale': 1.985290355667234e-06, 'environment': -0.12600277156818085} step=11077369
2023-01-10 07:38.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11077369.pt


Epoch 462/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:39.30 [info     ] DQN_20230110005015: epoch=462 step=11101398 epoch=462 metrics={'time_sample_batch': 6.121880473643684e-05, 'time_algorithm_update': 0.0018178336038597019, 'loss': 2.54697918304915e-11, 'time_step': 0.0021700111338993928, 'td_error': 1.1236909723758562e-14, 'value_scale': 8.29891915278349e-06, 'environment': -0.3906561440293742} step=11101398
2023-01-10 07:39.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11101398.pt


Epoch 463/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:40.24 [info     ] DQN_20230110005015: epoch=463 step=11125427 epoch=463 metrics={'time_sample_batch': 6.134850666602257e-05, 'time_algorithm_update': 0.0018210589967569146, 'loss': 2.6616408127900736e-11, 'time_step': 0.0021757599298285954, 'td_error': 1.0460265673129386e-14, 'value_scale': -8.007523085307204e-06, 'environment': -0.1806603219682466} step=11125427
2023-01-10 07:40.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11125427.pt


Epoch 464/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:41.19 [info     ] DQN_20230110005015: epoch=464 step=11149456 epoch=464 metrics={'time_sample_batch': 6.052688582122408e-05, 'time_algorithm_update': 0.0018247114971557297, 'loss': 2.6094580335743384e-11, 'time_step': 0.0021770750669123525, 'td_error': 1.721093692268146e-12, 'value_scale': -0.00010351913521711093, 'environment': 0.3157543511794415} step=11149456
2023-01-10 07:41.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11149456.pt


Epoch 465/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:42.13 [info     ] DQN_20230110005015: epoch=465 step=11173485 epoch=465 metrics={'time_sample_batch': 6.12569155925827e-05, 'time_algorithm_update': 0.0018246043878893683, 'loss': 2.5249751962860914e-11, 'time_step': 0.0021822428533639885, 'td_error': 3.553880862047828e-13, 'value_scale': -4.702787629706736e-05, 'environment': -0.20303288856051793} step=11173485
2023-01-10 07:42.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11173485.pt


Epoch 466/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:43.08 [info     ] DQN_20230110005015: epoch=466 step=11197514 epoch=466 metrics={'time_sample_batch': 6.186753267096671e-05, 'time_algorithm_update': 0.001843672456740936, 'loss': 2.5576427256087417e-11, 'time_step': 0.002200240494333863, 'td_error': 8.598567246798127e-15, 'value_scale': 7.204329612361974e-06, 'environment': -0.15829359182452962} step=11197514
2023-01-10 07:43.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11197514.pt


Epoch 467/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:44.01 [info     ] DQN_20230110005015: epoch=467 step=11221543 epoch=467 metrics={'time_sample_batch': 6.0504332846472825e-05, 'time_algorithm_update': 0.0017770843870199558, 'loss': 2.5126638831052373e-11, 'time_step': 0.00212930842202782, 'td_error': 5.942981172939149e-14, 'value_scale': 1.9178836074553826e-05, 'environment': -0.16941132429087444} step=11221543
2023-01-10 07:44.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11221543.pt


Epoch 468/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:44.56 [info     ] DQN_20230110005015: epoch=468 step=11245572 epoch=468 metrics={'time_sample_batch': 6.201261388381881e-05, 'time_algorithm_update': 0.0018415299142174489, 'loss': 2.5715081611936646e-11, 'time_step': 0.0021952462754965354, 'td_error': 1.0591188954525178e-13, 'value_scale': -2.5638384821438398e-05, 'environment': -0.2987156632136044} step=11245572
2023-01-10 07:44.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11245572.pt


Epoch 469/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:45.49 [info     ] DQN_20230110005015: epoch=469 step=11269601 epoch=469 metrics={'time_sample_batch': 6.011864026638893e-05, 'time_algorithm_update': 0.0017757403170394215, 'loss': 2.4973607840290417e-11, 'time_step': 0.002125508141599993, 'td_error': 5.276982132966705e-13, 'value_scale': -5.730868196246427e-05, 'environment': -0.19031568863995021} step=11269601
2023-01-10 07:45.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11269601.pt


Epoch 470/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:46.42 [info     ] DQN_20230110005015: epoch=470 step=11293630 epoch=470 metrics={'time_sample_batch': 5.971054354332733e-05, 'time_algorithm_update': 0.0017894313320436302, 'loss': 2.5211734831445564e-11, 'time_step': 0.00213971281474318, 'td_error': 4.943670577614116e-13, 'value_scale': -5.5473198444600585e-05, 'environment': -0.3044190370137499} step=11293630
2023-01-10 07:46.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11293630.pt


Epoch 471/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:47.36 [info     ] DQN_20230110005015: epoch=471 step=11317659 epoch=471 metrics={'time_sample_batch': 6.035481644676907e-05, 'time_algorithm_update': 0.0017816834773427563, 'loss': 2.45029391023453e-11, 'time_step': 0.002131354561250523, 'td_error': 6.595832099821518e-13, 'value_scale': -6.407402948424534e-05, 'environment': -0.2266745165156645} step=11317659
2023-01-10 07:47.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11317659.pt


Epoch 472/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:48.30 [info     ] DQN_20230110005015: epoch=472 step=11341688 epoch=472 metrics={'time_sample_batch': 6.0881938900194085e-05, 'time_algorithm_update': 0.0018193911382919732, 'loss': 2.4375918063756303e-11, 'time_step': 0.002174286852466752, 'td_error': 3.741128273142994e-14, 'value_scale': 1.5227292160257082e-05, 'environment': 0.15997786039989265} step=11341688
2023-01-10 07:48.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11341688.pt


Epoch 473/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:49.25 [info     ] DQN_20230110005015: epoch=473 step=11365717 epoch=473 metrics={'time_sample_batch': 6.172319561698235e-05, 'time_algorithm_update': 0.0018467417350298179, 'loss': 2.6154982491411476e-11, 'time_step': 0.002203441111701839, 'td_error': 3.973877050691756e-13, 'value_scale': 4.9739094770555816e-05, 'environment': -0.05949797852774926} step=11365717
2023-01-10 07:49.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11365717.pt


Epoch 474/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:50.20 [info     ] DQN_20230110005015: epoch=474 step=11389746 epoch=474 metrics={'time_sample_batch': 6.190014667360963e-05, 'time_algorithm_update': 0.0018361273009935128, 'loss': 2.5466141434640385e-11, 'time_step': 0.0021929349378976113, 'td_error': 1.2913679336125058e-12, 'value_scale': 8.967089895309464e-05, 'environment': 0.00840431250809701} step=11389746
2023-01-10 07:50.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11389746.pt


Epoch 475/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:51.13 [info     ] DQN_20230110005015: epoch=475 step=11413775 epoch=475 metrics={'time_sample_batch': 5.950888641229149e-05, 'time_algorithm_update': 0.0017781471153378978, 'loss': 2.4630201118800296e-11, 'time_step': 0.002130328693601829, 'td_error': 8.21859015397025e-16, 'value_scale': -2.1276284137273377e-06, 'environment': -0.2016914687276648} step=11413775
2023-01-10 07:51.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11413775.pt


Epoch 476/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:52.07 [info     ] DQN_20230110005015: epoch=476 step=11437804 epoch=476 metrics={'time_sample_batch': 6.171085250189631e-05, 'time_algorithm_update': 0.0018265773018794865, 'loss': 2.5535108174197102e-11, 'time_step': 0.0021815807504140757, 'td_error': 4.443908725191148e-14, 'value_scale': 1.6608895489930947e-05, 'environment': -0.2917491451952456} step=11437804
2023-01-10 07:52.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11437804.pt


Epoch 477/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:53.02 [info     ] DQN_20230110005015: epoch=477 step=11461833 epoch=477 metrics={'time_sample_batch': 6.153713605581407e-05, 'time_algorithm_update': 0.0018430077442739275, 'loss': 2.4354855709374558e-11, 'time_step': 0.002202713324329202, 'td_error': 2.055295555985994e-13, 'value_scale': -3.57612134727179e-05, 'environment': -0.15448461423058873} step=11461833
2023-01-10 07:53.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11461833.pt


Epoch 478/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:53.57 [info     ] DQN_20230110005015: epoch=478 step=11485862 epoch=478 metrics={'time_sample_batch': 6.196767661032632e-05, 'time_algorithm_update': 0.0018535529417691792, 'loss': 2.5729807372006877e-11, 'time_step': 0.002215032239368859, 'td_error': 4.60782417898379e-13, 'value_scale': -5.3554028321257356e-05, 'environment': -0.19212741044864234} step=11485862
2023-01-10 07:53.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11485862.pt


Epoch 479/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:54.52 [info     ] DQN_20230110005015: epoch=479 step=11509891 epoch=479 metrics={'time_sample_batch': 6.04487987507039e-05, 'time_algorithm_update': 0.0018307359592958934, 'loss': 2.473760923722438e-11, 'time_step': 0.0021848580161454283, 'td_error': 9.671766483414496e-16, 'value_scale': -2.215030571356861e-06, 'environment': -0.45088026010542165} step=11509891
2023-01-10 07:54.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11509891.pt


Epoch 480/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:55.46 [info     ] DQN_20230110005015: epoch=480 step=11533920 epoch=480 metrics={'time_sample_batch': 6.131497982850191e-05, 'time_algorithm_update': 0.001814150017464452, 'loss': 2.5222541943759194e-11, 'time_step': 0.002168371265729996, 'td_error': 5.083658841976851e-13, 'value_scale': 5.6249474433187435e-05, 'environment': 0.25120939884085497} step=11533920
2023-01-10 07:55.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11533920.pt


Epoch 481/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:56.40 [info     ] DQN_20230110005015: epoch=481 step=11557949 epoch=481 metrics={'time_sample_batch': 6.143940319118589e-05, 'time_algorithm_update': 0.0018222769566146645, 'loss': 2.6253479495673327e-11, 'time_step': 0.0021793126434843076, 'td_error': 1.364089185911593e-12, 'value_scale': -9.215288994907176e-05, 'environment': 0.02140359981354554} step=11557949
2023-01-10 07:56.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11557949.pt


Epoch 482/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:57.34 [info     ] DQN_20230110005015: epoch=482 step=11581978 epoch=482 metrics={'time_sample_batch': 6.0752246892726583e-05, 'time_algorithm_update': 0.001799522116367893, 'loss': 2.38220269886708e-11, 'time_step': 0.0021476292164665347, 'td_error': 3.3551593414736104e-13, 'value_scale': 4.5695972859627197e-05, 'environment': 0.24822253736712438} step=11581978
2023-01-10 07:57.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11581978.pt


Epoch 483/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:58.27 [info     ] DQN_20230110005015: epoch=483 step=11606007 epoch=483 metrics={'time_sample_batch': 5.9966593726534904e-05, 'time_algorithm_update': 0.0017812371606202482, 'loss': 2.634840202206012e-11, 'time_step': 0.002131719516603493, 'td_error': 3.86741975361998e-12, 'value_scale': 0.00015518202662814567, 'environment': -0.19269620070440396} step=11606007
2023-01-10 07:58.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11606007.pt


Epoch 484/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 07:59.21 [info     ] DQN_20230110005015: epoch=484 step=11630036 epoch=484 metrics={'time_sample_batch': 6.154091638286213e-05, 'time_algorithm_update': 0.0018269489645843843, 'loss': 2.6005464001289695e-11, 'time_step': 0.002184362961978143, 'td_error': 8.39310578188939e-15, 'value_scale': -7.1750914897956274e-06, 'environment': -0.2774526471652892} step=11630036
2023-01-10 07:59.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11630036.pt


Epoch 485/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:00.14 [info     ] DQN_20230110005015: epoch=485 step=11654065 epoch=485 metrics={'time_sample_batch': 6.0673356130629266e-05, 'time_algorithm_update': 0.0017797398336014353, 'loss': 2.452339941797707e-11, 'time_step': 0.0021308597551361937, 'td_error': 1.8954885031325793e-13, 'value_scale': 3.434360862825747e-05, 'environment': -0.02927750140913381} step=11654065
2023-01-10 08:00.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11654065.pt


Epoch 486/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:01.09 [info     ] DQN_20230110005015: epoch=486 step=11678094 epoch=486 metrics={'time_sample_batch': 6.097260721663797e-05, 'time_algorithm_update': 0.001826989506359498, 'loss': 2.511505783743674e-11, 'time_step': 0.0021865238901867226, 'td_error': 1.80724514606626e-13, 'value_scale': 3.353718977235794e-05, 'environment': -0.39894459479145816} step=11678094
2023-01-10 08:01.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11678094.pt


Epoch 487/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:02.04 [info     ] DQN_20230110005015: epoch=487 step=11702123 epoch=487 metrics={'time_sample_batch': 6.145310563647031e-05, 'time_algorithm_update': 0.0018318993673475798, 'loss': 2.6298796833101564e-11, 'time_step': 0.0021894639626519304, 'td_error': 1.1146948977887764e-12, 'value_scale': 8.330950325881373e-05, 'environment': -0.19917849713394475} step=11702123
2023-01-10 08:02.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11702123.pt


Epoch 488/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:02.58 [info     ] DQN_20230110005015: epoch=488 step=11726152 epoch=488 metrics={'time_sample_batch': 6.136557270938912e-05, 'time_algorithm_update': 0.0018355318250675575, 'loss': 2.5433399409096296e-11, 'time_step': 0.0021906028829158093, 'td_error': 5.221185302541326e-13, 'value_scale': 5.701141871635634e-05, 'environment': -0.37619864389303304} step=11726152
2023-01-10 08:02.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11726152.pt


Epoch 489/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:03.52 [info     ] DQN_20230110005015: epoch=489 step=11750181 epoch=489 metrics={'time_sample_batch': 6.053017004236031e-05, 'time_algorithm_update': 0.0017817638465004709, 'loss': 2.5239383113666472e-11, 'time_step': 0.0021343612012069786, 'td_error': 1.0871842421978013e-12, 'value_scale': 8.227497758828249e-05, 'environment': -0.23780938977700602} step=11750181
2023-01-10 08:03.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11750181.pt


Epoch 490/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:04.47 [info     ] DQN_20230110005015: epoch=490 step=11774210 epoch=490 metrics={'time_sample_batch': 6.075489609829569e-05, 'time_algorithm_update': 0.0018431921468413503, 'loss': 2.616543696869494e-11, 'time_step': 0.002197888892779135, 'td_error': 1.6897264191927948e-16, 'value_scale': -2.996395807927293e-08, 'environment': -0.0019769542130019914} step=11774210
2023-01-10 08:04.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11774210.pt


Epoch 491/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:05.40 [info     ] DQN_20230110005015: epoch=491 step=11798239 epoch=491 metrics={'time_sample_batch': 6.048118454462739e-05, 'time_algorithm_update': 0.0017879662717533219, 'loss': 2.4432369441210498e-11, 'time_step': 0.002138981098211721, 'td_error': 1.1766533875916443e-13, 'value_scale': -2.7050868155807202e-05, 'environment': -0.06970508710676149} step=11798239
2023-01-10 08:05.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11798239.pt


Epoch 492/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:06.34 [info     ] DQN_20230110005015: epoch=492 step=11822268 epoch=492 metrics={'time_sample_batch': 6.139040777133473e-05, 'time_algorithm_update': 0.001821030927084424, 'loss': 2.4996246552358173e-11, 'time_step': 0.002180102364718976, 'td_error': 3.38132771566109e-14, 'value_scale': 1.4484704056348983e-05, 'environment': -0.36063573399396237} step=11822268
2023-01-10 08:06.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11822268.pt


Epoch 493/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:07.28 [info     ] DQN_20230110005015: epoch=493 step=11846297 epoch=493 metrics={'time_sample_batch': 6.105284738681543e-05, 'time_algorithm_update': 0.0018008427503051528, 'loss': 2.471909523621098e-11, 'time_step': 0.0021518233454556365, 'td_error': 8.644362834492283e-13, 'value_scale': 7.336169547654802e-05, 'environment': -0.1754968919101217} step=11846297
2023-01-10 08:07.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11846297.pt


Epoch 494/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:08.22 [info     ] DQN_20230110005015: epoch=494 step=11870326 epoch=494 metrics={'time_sample_batch': 6.096695160924324e-05, 'time_algorithm_update': 0.0018281035121844856, 'loss': 2.6540122414523934e-11, 'time_step': 0.0021831926977427555, 'td_error': 1.2908870757862376e-13, 'value_scale': -2.8320735229927623e-05, 'environment': -0.5025460475889753} step=11870326
2023-01-10 08:08.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11870326.pt


Epoch 495/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:09.17 [info     ] DQN_20230110005015: epoch=495 step=11894355 epoch=495 metrics={'time_sample_batch': 6.203281531654804e-05, 'time_algorithm_update': 0.0018415502446377152, 'loss': 2.5861867781991794e-11, 'time_step': 0.002201882446148089, 'td_error': 5.047834276377495e-14, 'value_scale': 1.7686549297385883e-05, 'environment': -0.03557313865300543} step=11894355
2023-01-10 08:09.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11894355.pt


Epoch 496/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:10.13 [info     ] DQN_20230110005015: epoch=496 step=11918384 epoch=496 metrics={'time_sample_batch': 6.227430975230288e-05, 'time_algorithm_update': 0.0018455020556552486, 'loss': 2.4903032008981257e-11, 'time_step': 0.0022031651478273315, 'td_error': 1.3455316062697077e-13, 'value_scale': -2.892152109903004e-05, 'environment': -0.26157205830320207} step=11918384
2023-01-10 08:10.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11918384.pt


Epoch 497/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:11.07 [info     ] DQN_20230110005015: epoch=497 step=11942413 epoch=497 metrics={'time_sample_batch': 6.131544616805902e-05, 'time_algorithm_update': 0.0018095058211921489, 'loss': 2.409200008276622e-11, 'time_step': 0.0021625333691790306, 'td_error': 4.6804707646450495e-14, 'value_scale': 1.7042580031235106e-05, 'environment': 0.18487148345712093} step=11942413
2023-01-10 08:11.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11942413.pt


Epoch 498/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:12.01 [info     ] DQN_20230110005015: epoch=498 step=11966442 epoch=498 metrics={'time_sample_batch': 6.088989643901965e-05, 'time_algorithm_update': 0.001821916793644803, 'loss': 2.76486899939333e-11, 'time_step': 0.0021764306749434918, 'td_error': 4.2433612193074525e-13, 'value_scale': -5.138994286222642e-05, 'environment': 0.1649866454624162} step=11966442
2023-01-10 08:12.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11966442.pt


Epoch 499/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:12.55 [info     ] DQN_20230110005015: epoch=499 step=11990471 epoch=499 metrics={'time_sample_batch': 6.039825548040787e-05, 'time_algorithm_update': 0.0018003218390977439, 'loss': 2.445897383690693e-11, 'time_step': 0.0021511751632376096, 'td_error': 4.105070763546832e-15, 'value_scale': -4.995883957256764e-06, 'environment': -0.151548062326756} step=11990471
2023-01-10 08:12.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_11990471.pt


Epoch 500/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:13.49 [info     ] DQN_20230110005015: epoch=500 step=12014500 epoch=500 metrics={'time_sample_batch': 6.13610680677098e-05, 'time_algorithm_update': 0.0018195583160621376, 'loss': 2.4458777686094665e-11, 'time_step': 0.0021770269545610243, 'td_error': 3.396156028843184e-14, 'value_scale': -1.4523659811505427e-05, 'environment': -0.24602232010924796} step=12014500
2023-01-10 08:13.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12014500.pt


Epoch 501/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:14.44 [info     ] DQN_20230110005015: epoch=501 step=12038529 epoch=501 metrics={'time_sample_batch': 6.124975182321605e-05, 'time_algorithm_update': 0.0018418474716116037, 'loss': 2.5013152981634555e-11, 'time_step': 0.002199380167305824, 'td_error': 1.0007182287375198e-13, 'value_scale': 2.4954874898131924e-05, 'environment': -0.02247921107153198} step=12038529
2023-01-10 08:14.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12038529.pt


Epoch 502/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:15.38 [info     ] DQN_20230110005015: epoch=502 step=12062558 epoch=502 metrics={'time_sample_batch': 6.085138869814433e-05, 'time_algorithm_update': 0.0018093642920976255, 'loss': 2.6923793360393712e-11, 'time_step': 0.0021652807541080884, 'td_error': 1.0687725640039535e-13, 'value_scale': -2.577040861507354e-05, 'environment': -0.35438079578225967} step=12062558
2023-01-10 08:15.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12062558.pt


Epoch 503/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:16.32 [info     ] DQN_20230110005015: epoch=503 step=12086587 epoch=503 metrics={'time_sample_batch': 6.115807145071206e-05, 'time_algorithm_update': 0.0018317172171009968, 'loss': 2.4050960005828145e-11, 'time_step': 0.002187126857311946, 'td_error': 2.5847690207315966e-14, 'value_scale': 1.2609004918429027e-05, 'environment': -0.0038189119271323955} step=12086587
2023-01-10 08:16.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12086587.pt


Epoch 504/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:17.28 [info     ] DQN_20230110005015: epoch=504 step=12110616 epoch=504 metrics={'time_sample_batch': 6.219964581257425e-05, 'time_algorithm_update': 0.001864091928018415, 'loss': 2.5390358317594644e-11, 'time_step': 0.002227414715497938, 'td_error': 1.438848358050537e-13, 'value_scale': -2.9910108516517393e-05, 'environment': -0.00779991853598766} step=12110616
2023-01-10 08:17.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12110616.pt


Epoch 505/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:18.22 [info     ] DQN_20230110005015: epoch=505 step=12134645 epoch=505 metrics={'time_sample_batch': 6.200848628263248e-05, 'time_algorithm_update': 0.0018243763379238236, 'loss': 2.600863544202156e-11, 'time_step': 0.002181804801765971, 'td_error': 1.1179986460953963e-15, 'value_scale': -2.435229769815133e-06, 'environment': -0.7221605897834948} step=12134645
2023-01-10 08:18.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12134645.pt


Epoch 506/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:19.17 [info     ] DQN_20230110005015: epoch=506 step=12158674 epoch=506 metrics={'time_sample_batch': 6.18755497425017e-05, 'time_algorithm_update': 0.0018465545840356437, 'loss': 2.4100580255890075e-11, 'time_step': 0.0022097479077164187, 'td_error': 1.176067662770025e-14, 'value_scale': -8.477226236684653e-06, 'environment': -0.08123215766683378} step=12158674
2023-01-10 08:19.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12158674.pt


Epoch 507/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:20.13 [info     ] DQN_20230110005015: epoch=507 step=12182703 epoch=507 metrics={'time_sample_batch': 6.22267034290048e-05, 'time_algorithm_update': 0.0018597843098409257, 'loss': 2.537173791471332e-11, 'time_step': 0.00222469855547497, 'td_error': 5.271501931171801e-13, 'value_scale': -5.727099730599139e-05, 'environment': -0.2793403387493486} step=12182703
2023-01-10 08:20.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12182703.pt


Epoch 508/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:21.06 [info     ] DQN_20230110005015: epoch=508 step=12206732 epoch=508 metrics={'time_sample_batch': 5.9908539412733945e-05, 'time_algorithm_update': 0.001787718476772487, 'loss': 2.5471657459772973e-11, 'time_step': 0.0021402146953278115, 'td_error': 1.3220335097605143e-12, 'value_scale': -9.07243811197582e-05, 'environment': -0.11509851408301944} step=12206732
2023-01-10 08:21.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12206732.pt


Epoch 509/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:22.01 [info     ] DQN_20230110005015: epoch=509 step=12230761 epoch=509 metrics={'time_sample_batch': 6.164505893587099e-05, 'time_algorithm_update': 0.0018224266020019054, 'loss': 2.518795065680963e-11, 'time_step': 0.0021803794894813175, 'td_error': 1.2504003119705752e-14, 'value_scale': -8.738353675441754e-06, 'environment': -0.23669218723396762} step=12230761
2023-01-10 08:22.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12230761.pt


Epoch 510/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:22.54 [info     ] DQN_20230110005015: epoch=510 step=12254790 epoch=510 metrics={'time_sample_batch': 6.125959456450653e-05, 'time_algorithm_update': 0.0017957996948018663, 'loss': 2.6100394284402e-11, 'time_step': 0.0021517176947406557, 'td_error': 2.992084508244894e-13, 'value_scale': 4.3152099590323106e-05, 'environment': -0.3248289633816724} step=12254790
2023-01-10 08:22.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12254790.pt


Epoch 511/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:23.49 [info     ] DQN_20230110005015: epoch=511 step=12278819 epoch=511 metrics={'time_sample_batch': 6.0688120242564975e-05, 'time_algorithm_update': 0.0018296696689375043, 'loss': 2.517338802375769e-11, 'time_step': 0.0021877899127852093, 'td_error': 1.4797795685143098e-12, 'value_scale': 9.598609191000674e-05, 'environment': -0.20018305403831413} step=12278819
2023-01-10 08:23.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12278819.pt


Epoch 512/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:24.44 [info     ] DQN_20230110005015: epoch=512 step=12302848 epoch=512 metrics={'time_sample_batch': 6.191352168899226e-05, 'time_algorithm_update': 0.0018576302179718113, 'loss': 2.6228664384187312e-11, 'time_step': 0.0022192072484361193, 'td_error': 1.972771119968827e-14, 'value_scale': 1.1041514958362624e-05, 'environment': -0.4461062898607569} step=12302848
2023-01-10 08:24.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12302848.pt


Epoch 513/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:25.38 [info     ] DQN_20230110005015: epoch=513 step=12326877 epoch=513 metrics={'time_sample_batch': 5.993531920985389e-05, 'time_algorithm_update': 0.0017902953897881257, 'loss': 2.4431197988767178e-11, 'time_step': 0.0021435425340958145, 'td_error': 9.818145426599742e-15, 'value_scale': 7.757827123068879e-06, 'environment': -0.059878641585940716} step=12326877
2023-01-10 08:25.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12326877.pt


Epoch 514/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:26.33 [info     ] DQN_20230110005015: epoch=514 step=12350906 epoch=514 metrics={'time_sample_batch': 6.228122546871363e-05, 'time_algorithm_update': 0.0018485796189128575, 'loss': 2.5601703703262005e-11, 'time_step': 0.002212343722367298, 'td_error': 6.509027336106504e-14, 'value_scale': -2.0105737074883836e-05, 'environment': -0.33033082697506694} step=12350906
2023-01-10 08:26.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12350906.pt


Epoch 515/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:27.28 [info     ] DQN_20230110005015: epoch=515 step=12374935 epoch=515 metrics={'time_sample_batch': 6.121755454953905e-05, 'time_algorithm_update': 0.0018504793076703917, 'loss': 2.4461009513662684e-11, 'time_step': 0.0022084802777347775, 'td_error': 1.308827322032998e-14, 'value_scale': -8.983941347187419e-06, 'environment': -0.28521515967452765} step=12374935
2023-01-10 08:27.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12374935.pt


Epoch 516/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:28.23 [info     ] DQN_20230110005015: epoch=516 step=12398964 epoch=516 metrics={'time_sample_batch': 6.0442021943948464e-05, 'time_algorithm_update': 0.001811948874910661, 'loss': 2.620230436728567e-11, 'time_step': 0.002166755994491707, 'td_error': 4.4672835766279835e-13, 'value_scale': 5.2734129845012665e-05, 'environment': -0.25834674894934395} step=12398964
2023-01-10 08:28.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12398964.pt


Epoch 517/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:29.17 [info     ] DQN_20230110005015: epoch=517 step=12422993 epoch=517 metrics={'time_sample_batch': 6.142099766185744e-05, 'time_algorithm_update': 0.0018064964030425873, 'loss': 2.5739052608033722e-11, 'time_step': 0.0021644076473917614, 'td_error': 1.0617746431815412e-12, 'value_scale': -8.129795854555878e-05, 'environment': -0.3020066972116773} step=12422993
2023-01-10 08:29.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12422993.pt


Epoch 518/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:30.12 [info     ] DQN_20230110005015: epoch=518 step=12447022 epoch=518 metrics={'time_sample_batch': 6.185482243750594e-05, 'time_algorithm_update': 0.0018433742573994605, 'loss': 2.515860914735414e-11, 'time_step': 0.002203575169441331, 'td_error': 4.5749236067769614e-14, 'value_scale': 1.6826095470000393e-05, 'environment': -0.0898578387461367} step=12447022
2023-01-10 08:30.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12447022.pt


Epoch 519/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:31.07 [info     ] DQN_20230110005015: epoch=519 step=12471051 epoch=519 metrics={'time_sample_batch': 6.231342274239062e-05, 'time_algorithm_update': 0.0018490851607591186, 'loss': 2.5743339505153004e-11, 'time_step': 0.0022112363345951737, 'td_error': 6.408434161358877e-14, 'value_scale': -1.9952011411644675e-05, 'environment': -0.23734342829952443} step=12471051
2023-01-10 08:31.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12471051.pt


Epoch 520/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:32.01 [info     ] DQN_20230110005015: epoch=520 step=12495080 epoch=520 metrics={'time_sample_batch': 6.05655027054e-05, 'time_algorithm_update': 0.0017958429453152585, 'loss': 2.633900311948542e-11, 'time_step': 0.0021518683323397203, 'td_error': 9.882227651459371e-13, 'value_scale': -7.844046608938101e-05, 'environment': -0.16902209459135104} step=12495080
2023-01-10 08:32.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12495080.pt


Epoch 521/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:32.56 [info     ] DQN_20230110005015: epoch=521 step=12519109 epoch=521 metrics={'time_sample_batch': 6.278101248639746e-05, 'time_algorithm_update': 0.0018379338508271621, 'loss': 2.6119588368388945e-11, 'time_step': 0.002199927957531536, 'td_error': 8.370597759560781e-13, 'value_scale': -7.219248513430896e-05, 'environment': -0.1130863280675352} step=12519109
2023-01-10 08:32.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12519109.pt


Epoch 522/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:33.51 [info     ] DQN_20230110005015: epoch=522 step=12543138 epoch=522 metrics={'time_sample_batch': 6.180406088060871e-05, 'time_algorithm_update': 0.0018609581956494697, 'loss': 2.5088719147480062e-11, 'time_step': 0.0022229421817932422, 'td_error': 1.0342858089963138e-12, 'value_scale': -8.02435843858178e-05, 'environment': -0.16475555334381142} step=12543138
2023-01-10 08:33.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12543138.pt


Epoch 523/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:34.46 [info     ] DQN_20230110005015: epoch=523 step=12567167 epoch=523 metrics={'time_sample_batch': 6.125465334962482e-05, 'time_algorithm_update': 0.0018466154363867872, 'loss': 2.5684932983612298e-11, 'time_step': 0.0022092599577857907, 'td_error': 7.271463752974814e-13, 'value_scale': -6.728634499417634e-05, 'environment': -0.043786454339441505} step=12567167
2023-01-10 08:34.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12567167.pt


Epoch 524/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:35.42 [info     ] DQN_20230110005015: epoch=524 step=12591196 epoch=524 metrics={'time_sample_batch': 6.153181780043938e-05, 'time_algorithm_update': 0.0018689896641780118, 'loss': 2.546931885619605e-11, 'time_step': 0.0022306675628963107, 'td_error': 4.243758025092857e-13, 'value_scale': 5.1400196060761026e-05, 'environment': -0.2550339347278073} step=12591196
2023-01-10 08:35.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12591196.pt


Epoch 525/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:36.36 [info     ] DQN_20230110005015: epoch=525 step=12615225 epoch=525 metrics={'time_sample_batch': 6.057025540003522e-05, 'time_algorithm_update': 0.001807873364845075, 'loss': 2.4726687521201323e-11, 'time_step': 0.002165825994349413, 'td_error': 7.28735126674347e-15, 'value_scale': 6.5728798058732525e-06, 'environment': -0.09241087572822253} step=12615225
2023-01-10 08:36.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12615225.pt


Epoch 526/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:37.31 [info     ] DQN_20230110005015: epoch=526 step=12639254 epoch=526 metrics={'time_sample_batch': 6.149317114990874e-05, 'time_algorithm_update': 0.0018311283790721415, 'loss': 2.568636794984227e-11, 'time_step': 0.0021893332883547575, 'td_error': 7.862074329998863e-14, 'value_scale': -2.2104289546380637e-05, 'environment': -0.20595577209565716} step=12639254
2023-01-10 08:37.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12639254.pt


Epoch 527/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:38.26 [info     ] DQN_20230110005015: epoch=527 step=12663283 epoch=527 metrics={'time_sample_batch': 6.113256168472637e-05, 'time_algorithm_update': 0.001846200433869433, 'loss': 2.521254723449609e-11, 'time_step': 0.0022075892219065607, 'td_error': 9.86928458898409e-15, 'value_scale': 7.811261419297076e-06, 'environment': -0.1882833318735942} step=12663283
2023-01-10 08:38.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12663283.pt


Epoch 528/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:39.20 [info     ] DQN_20230110005015: epoch=528 step=12687312 epoch=528 metrics={'time_sample_batch': 6.11302200648226e-05, 'time_algorithm_update': 0.0018211386812884709, 'loss': 2.5093939077379777e-11, 'time_step': 0.0021790077070245493, 'td_error': 1.834262125053334e-13, 'value_scale': -3.378682862081836e-05, 'environment': 0.1024608131366779} step=12687312
2023-01-10 08:39.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12687312.pt


Epoch 529/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:40.14 [info     ] DQN_20230110005015: epoch=529 step=12711341 epoch=529 metrics={'time_sample_batch': 6.146908024683086e-05, 'time_algorithm_update': 0.0018201460626579868, 'loss': 2.404344817536462e-11, 'time_step': 0.0021738318935792603, 'td_error': 2.1853542077043075e-13, 'value_scale': -3.688106031053459e-05, 'environment': -0.01221030861499144} step=12711341
2023-01-10 08:40.14 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12711341.pt


Epoch 530/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:41.08 [info     ] DQN_20230110005015: epoch=530 step=12735370 epoch=530 metrics={'time_sample_batch': 6.0748645163806786e-05, 'time_algorithm_update': 0.001797744757406095, 'loss': 2.4963976047078123e-11, 'time_step': 0.0021522509887516236, 'td_error': 1.5219828371061084e-13, 'value_scale': -3.07749192696038e-05, 'environment': 0.14898731447532085} step=12735370
2023-01-10 08:41.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12735370.pt


Epoch 531/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:42.02 [info     ] DQN_20230110005015: epoch=531 step=12759399 epoch=531 metrics={'time_sample_batch': 6.0378073891915106e-05, 'time_algorithm_update': 0.0018017002396073759, 'loss': 2.5727935421927025e-11, 'time_step': 0.0021588513405563442, 'td_error': 3.654635460268869e-13, 'value_scale': -4.769461726910794e-05, 'environment': -0.39398057356264327} step=12759399
2023-01-10 08:42.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12759399.pt


Epoch 532/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:42.57 [info     ] DQN_20230110005015: epoch=532 step=12783428 epoch=532 metrics={'time_sample_batch': 6.158539723891574e-05, 'time_algorithm_update': 0.0018524827420962051, 'loss': 2.485027050743472e-11, 'time_step': 0.0022137572174091317, 'td_error': 3.982154394025088e-13, 'value_scale': -4.9783686634501956e-05, 'environment': -0.31374177594011704} step=12783428
2023-01-10 08:42.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12783428.pt


Epoch 533/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:43.52 [info     ] DQN_20230110005015: epoch=533 step=12807457 epoch=533 metrics={'time_sample_batch': 6.265069538548107e-05, 'time_algorithm_update': 0.001838944974208157, 'loss': 2.4558310885919885e-11, 'time_step': 0.0021993571777578704, 'td_error': 5.707759997407841e-14, 'value_scale': -1.8834714377541683e-05, 'environment': -0.5500399738981808} step=12807457
2023-01-10 08:43.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12807457.pt


Epoch 534/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:44.47 [info     ] DQN_20230110005015: epoch=534 step=12831486 epoch=534 metrics={'time_sample_batch': 6.203342056576046e-05, 'time_algorithm_update': 0.0018347445744403299, 'loss': 2.441291777074148e-11, 'time_step': 0.0021955808790898204, 'td_error': 7.499207244346452e-15, 'value_scale': 6.7323606929448545e-06, 'environment': -0.42593984396267814} step=12831486
2023-01-10 08:44.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12831486.pt


Epoch 535/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:45.42 [info     ] DQN_20230110005015: epoch=535 step=12855515 epoch=535 metrics={'time_sample_batch': 6.247857640043489e-05, 'time_algorithm_update': 0.0018586934622399018, 'loss': 2.6061069254870808e-11, 'time_step': 0.002220733448819, 'td_error': 3.972025916748016e-13, 'value_scale': 4.9726614750972e-05, 'environment': -0.2562792772388155} step=12855515
2023-01-10 08:45.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12855515.pt


Epoch 536/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:46.38 [info     ] DQN_20230110005015: epoch=536 step=12879544 epoch=536 metrics={'time_sample_batch': 6.240789123011903e-05, 'time_algorithm_update': 0.0018597973673485247, 'loss': 2.5607900592619836e-11, 'time_step': 0.0022222386539996926, 'td_error': 8.059290278054192e-13, 'value_scale': -7.083432065813932e-05, 'environment': -0.07812572853930425} step=12879544
2023-01-10 08:46.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12879544.pt


Epoch 537/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:47.32 [info     ] DQN_20230110005015: epoch=537 step=12903573 epoch=537 metrics={'time_sample_batch': 6.000892148293123e-05, 'time_algorithm_update': 0.0018098609536480648, 'loss': 2.5158957407282284e-11, 'time_step': 0.0021655631872036867, 'td_error': 1.1101311629474064e-13, 'value_scale': 2.6272204416443457e-05, 'environment': 0.06942836642024683} step=12903573
2023-01-10 08:47.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12903573.pt


Epoch 538/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:48.26 [info     ] DQN_20230110005015: epoch=538 step=12927602 epoch=538 metrics={'time_sample_batch': 6.165693571139992e-05, 'time_algorithm_update': 0.0018030725677806471, 'loss': 2.4817524970981736e-11, 'time_step': 0.0021584874567921438, 'td_error': 4.327495751066465e-14, 'value_scale': -1.6396491160142875e-05, 'environment': -0.19768982096349902} step=12927602
2023-01-10 08:48.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12927602.pt


Epoch 539/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:49.21 [info     ] DQN_20230110005015: epoch=539 step=12951631 epoch=539 metrics={'time_sample_batch': 6.227678035974374e-05, 'time_algorithm_update': 0.0018700096777769465, 'loss': 2.5312297854315448e-11, 'time_step': 0.0022293512946910746, 'td_error': 1.4170979954157777e-12, 'value_scale': -9.393121096611927e-05, 'environment': -0.23008165193286953} step=12951631
2023-01-10 08:49.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12951631.pt


Epoch 540/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:50.15 [info     ] DQN_20230110005015: epoch=540 step=12975660 epoch=540 metrics={'time_sample_batch': 6.0079299067581753e-05, 'time_algorithm_update': 0.0017923145805376986, 'loss': 2.5410774654144407e-11, 'time_step': 0.002147435973291763, 'td_error': 4.85440823157345e-13, 'value_scale': 5.4971519152254287e-05, 'environment': -0.19208901694459968} step=12975660
2023-01-10 08:50.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12975660.pt


Epoch 541/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:51.08 [info     ] DQN_20230110005015: epoch=541 step=12999689 epoch=541 metrics={'time_sample_batch': 6.020773096603323e-05, 'time_algorithm_update': 0.0017848144712070682, 'loss': 2.510746995330295e-11, 'time_step': 0.002142651180604051, 'td_error': 7.152934356672205e-15, 'value_scale': -6.540659242928936e-06, 'environment': -0.14295981652098538} step=12999689
2023-01-10 08:51.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_12999689.pt


Epoch 542/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:52.04 [info     ] DQN_20230110005015: epoch=542 step=13023718 epoch=542 metrics={'time_sample_batch': 6.257837306565622e-05, 'time_algorithm_update': 0.001876575907417052, 'loss': 2.5653868647429764e-11, 'time_step': 0.0022402545119786493, 'td_error': 4.901203846003212e-13, 'value_scale': 5.523695540069592e-05, 'environment': -0.04528113430966578} step=13023718
2023-01-10 08:52.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13023718.pt


Epoch 543/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:52.59 [info     ] DQN_20230110005015: epoch=543 step=13047747 epoch=543 metrics={'time_sample_batch': 6.178218260989753e-05, 'time_algorithm_update': 0.0018466514635981034, 'loss': 2.4717837010354115e-11, 'time_step': 0.0022067769675413774, 'td_error': 2.6428345601707933e-14, 'value_scale': 1.2795393647879949e-05, 'environment': -0.381063060424026} step=13047747
2023-01-10 08:52.59 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13047747.pt


Epoch 544/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:53.53 [info     ] DQN_20230110005015: epoch=544 step=13071776 epoch=544 metrics={'time_sample_batch': 6.131197342667628e-05, 'time_algorithm_update': 0.0018166596684405666, 'loss': 2.4716149168923448e-11, 'time_step': 0.0021738537420836167, 'td_error': 1.845265054453099e-13, 'value_scale': -3.389075738539716e-05, 'environment': 0.1644982735322555} step=13071776
2023-01-10 08:53.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13071776.pt


Epoch 545/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:54.49 [info     ] DQN_20230110005015: epoch=545 step=13095805 epoch=545 metrics={'time_sample_batch': 6.239643118355604e-05, 'time_algorithm_update': 0.0018563953805909412, 'loss': 2.6029885378219874e-11, 'time_step': 0.0022195148043350917, 'td_error': 2.893499603656326e-13, 'value_scale': -4.2440933440709554e-05, 'environment': -0.13866977663441343} step=13095805
2023-01-10 08:54.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13095805.pt


Epoch 546/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:55.44 [info     ] DQN_20230110005015: epoch=546 step=13119834 epoch=546 metrics={'time_sample_batch': 6.287389343520805e-05, 'time_algorithm_update': 0.0018599252336862367, 'loss': 2.5483365005255756e-11, 'time_step': 0.002223337101943286, 'td_error': 8.649291857570281e-13, 'value_scale': -7.338480010183815e-05, 'environment': -0.2882671124040724} step=13119834
2023-01-10 08:55.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13119834.pt


Epoch 547/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:56.40 [info     ] DQN_20230110005015: epoch=547 step=13143863 epoch=547 metrics={'time_sample_batch': 6.230182378617232e-05, 'time_algorithm_update': 0.0018633551809730104, 'loss': 2.5867007078758465e-11, 'time_step': 0.0022261241257346977, 'td_error': 1.21129801836699e-12, 'value_scale': -8.684318430898761e-05, 'environment': -0.16348357720750809} step=13143863
2023-01-10 08:56.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13143863.pt


Epoch 548/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:57.34 [info     ] DQN_20230110005015: epoch=548 step=13167892 epoch=548 metrics={'time_sample_batch': 6.136784487446524e-05, 'time_algorithm_update': 0.0018206765883979669, 'loss': 2.6454255119830922e-11, 'time_step': 0.0021795098158179003, 'td_error': 1.0043530612598145e-13, 'value_scale': 2.4997257956548755e-05, 'environment': -0.3087854619444883} step=13167892
2023-01-10 08:57.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13167892.pt


Epoch 549/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:58.31 [info     ] DQN_20230110005015: epoch=549 step=13191921 epoch=549 metrics={'time_sample_batch': 6.446141250878964e-05, 'time_algorithm_update': 0.0018882527423413973, 'loss': 2.5341520285669637e-11, 'time_step': 0.0022721009693211637, 'td_error': 6.13072604803638e-13, 'value_scale': 6.178111488671059e-05, 'environment': -0.6264572943484501} step=13191921
2023-01-10 08:58.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13191921.pt


Epoch 550/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 08:59.28 [info     ] DQN_20230110005015: epoch=550 step=13215950 epoch=550 metrics={'time_sample_batch': 6.707797430890087e-05, 'time_algorithm_update': 0.0018933270326728922, 'loss': 2.5797212609969876e-11, 'time_step': 0.0022793969707575533, 'td_error': 1.2530239485928977e-13, 'value_scale': -2.7922400194799562e-05, 'environment': 0.21461652522217234} step=13215950
2023-01-10 08:59.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13215950.pt


Epoch 551/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:00.21 [info     ] DQN_20230110005015: epoch=551 step=13239979 epoch=551 metrics={'time_sample_batch': 6.061390279815698e-05, 'time_algorithm_update': 0.0017917485633807868, 'loss': 2.491991342084404e-11, 'time_step': 0.0021500855757456017, 'td_error': 2.3178973064874715e-15, 'value_scale': -3.6673537811100814e-06, 'environment': -0.3493528156729984} step=13239979
2023-01-10 09:00.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13239979.pt


Epoch 552/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:01.15 [info     ] DQN_20230110005015: epoch=552 step=13264008 epoch=552 metrics={'time_sample_batch': 6.183803421345001e-05, 'time_algorithm_update': 0.0018084856289950861, 'loss': 2.472085896001168e-11, 'time_step': 0.0021722663321533356, 'td_error': 1.1937058114968254e-14, 'value_scale': 8.579326211173529e-06, 'environment': -0.12963687257119696} step=13264008
2023-01-10 09:01.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13264008.pt


Epoch 553/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:02.11 [info     ] DQN_20230110005015: epoch=553 step=13288037 epoch=553 metrics={'time_sample_batch': 6.235188087267477e-05, 'time_algorithm_update': 0.0018550606770900222, 'loss': 2.41932904498973e-11, 'time_step': 0.002220493373246153, 'td_error': 6.617864334085925e-14, 'value_scale': 2.0276080361189935e-05, 'environment': -0.3756242616173565} step=13288037
2023-01-10 09:02.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13288037.pt


Epoch 554/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:03.07 [info     ] DQN_20230110005015: epoch=554 step=13312066 epoch=554 metrics={'time_sample_batch': 6.268448019807588e-05, 'time_algorithm_update': 0.0018696696864734593, 'loss': 2.55589778923131e-11, 'time_step': 0.0022384423363039597, 'td_error': 1.7153959061529957e-13, 'value_scale': -3.26618738415784e-05, 'environment': -0.2751901034165181} step=13312066
2023-01-10 09:03.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13312066.pt


Epoch 555/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:04.01 [info     ] DQN_20230110005015: epoch=555 step=13336095 epoch=555 metrics={'time_sample_batch': 6.0894668977891336e-05, 'time_algorithm_update': 0.0018053647259250207, 'loss': 2.5954395084321327e-11, 'time_step': 0.0021659224770271433, 'td_error': 7.556398008757576e-14, 'value_scale': -2.1660369433894486e-05, 'environment': -0.4166860137524207} step=13336095
2023-01-10 09:04.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13336095.pt


Epoch 556/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:04.54 [info     ] DQN_20230110005015: epoch=556 step=13360124 epoch=556 metrics={'time_sample_batch': 6.081155139342533e-05, 'time_algorithm_update': 0.0018007759546051858, 'loss': 2.597077335341831e-11, 'time_step': 0.0021609585008062005, 'td_error': 1.3576038814080298e-15, 'value_scale': 2.648872532922659e-06, 'environment': -0.10462791355019466} step=13360124
2023-01-10 09:04.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13360124.pt


Epoch 557/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:05.48 [info     ] DQN_20230110005015: epoch=557 step=13384153 epoch=557 metrics={'time_sample_batch': 6.144189364286322e-05, 'time_algorithm_update': 0.001808715455019795, 'loss': 2.4919498444200497e-11, 'time_step': 0.00216666652675157, 'td_error': 6.757445730827747e-14, 'value_scale': -2.0490103202112372e-05, 'environment': -0.011217608612419216} step=13384153
2023-01-10 09:05.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13384153.pt


Epoch 558/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:06.42 [info     ] DQN_20230110005015: epoch=558 step=13408182 epoch=558 metrics={'time_sample_batch': 5.9897267886417434e-05, 'time_algorithm_update': 0.0017874932843769944, 'loss': 2.5806708256517227e-11, 'time_step': 0.002144905535477944, 'td_error': 1.9709722200246093e-12, 'value_scale': -0.00011078268470535993, 'environment': -0.22868953419990268} step=13408182
2023-01-10 09:06.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13408182.pt


Epoch 559/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:07.36 [info     ] DQN_20230110005015: epoch=559 step=13432211 epoch=559 metrics={'time_sample_batch': 6.111168554795707e-05, 'time_algorithm_update': 0.001803639051277116, 'loss': 2.61276713308013e-11, 'time_step': 0.002164837949815436, 'td_error': 3.581061941061275e-13, 'value_scale': 4.719828269466445e-05, 'environment': -0.21388285192216006} step=13432211
2023-01-10 09:07.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13432211.pt


Epoch 560/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:08.31 [info     ] DQN_20230110005015: epoch=560 step=13456240 epoch=560 metrics={'time_sample_batch': 6.231717330308397e-05, 'time_algorithm_update': 0.0018609050726284323, 'loss': 2.481779404563335e-11, 'time_step': 0.0022241806407472683, 'td_error': 1.3327013243254368e-14, 'value_scale': -9.024057544949083e-06, 'environment': -0.1669367299276002} step=13456240
2023-01-10 09:08.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13456240.pt


Epoch 561/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:09.26 [info     ] DQN_20230110005015: epoch=561 step=13480269 epoch=561 metrics={'time_sample_batch': 6.165939639672253e-05, 'time_algorithm_update': 0.0018134253257927047, 'loss': 2.5243180838477036e-11, 'time_step': 0.0021755181972620027, 'td_error': 8.592357624277991e-14, 'value_scale': -2.311166264505625e-05, 'environment': -0.35424317147942685} step=13480269
2023-01-10 09:09.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13480269.pt


Epoch 562/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:10.20 [info     ] DQN_20230110005015: epoch=562 step=13504298 epoch=562 metrics={'time_sample_batch': 6.102487693550713e-05, 'time_algorithm_update': 0.0018296921623795462, 'loss': 2.4683288002996162e-11, 'time_step': 0.002191802228879748, 'td_error': 4.129012161019692e-14, 'value_scale': -1.6008775245836053e-05, 'environment': -0.22794243744915982} step=13504298
2023-01-10 09:10.20 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13504298.pt


Epoch 563/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:11.15 [info     ] DQN_20230110005015: epoch=563 step=13528327 epoch=563 metrics={'time_sample_batch': 6.121278201066737e-05, 'time_algorithm_update': 0.0018192270959111712, 'loss': 2.5105142288726722e-11, 'time_step': 0.0021827763656615574, 'td_error': 4.036590866320483e-13, 'value_scale': 5.012845911632892e-05, 'environment': -0.15088562791946575} step=13528327
2023-01-10 09:11.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13528327.pt


Epoch 564/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:12.11 [info     ] DQN_20230110005015: epoch=564 step=13552356 epoch=564 metrics={'time_sample_batch': 6.21498863896189e-05, 'time_algorithm_update': 0.001866552821705516, 'loss': 2.553653315020524e-11, 'time_step': 0.0022302336388994796, 'td_error': 7.177470509653842e-14, 'value_scale': 2.11107054968865e-05, 'environment': -0.20295233182887262} step=13552356
2023-01-10 09:12.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13552356.pt


Epoch 565/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:13.06 [info     ] DQN_20230110005015: epoch=565 step=13576385 epoch=565 metrics={'time_sample_batch': 6.162599854673893e-05, 'time_algorithm_update': 0.0018439414255220874, 'loss': 2.4775654553323316e-11, 'time_step': 0.002210025865936692, 'td_error': 4.374128923984601e-14, 'value_scale': -1.6471459392317856e-05, 'environment': -0.1977637298853372} step=13576385
2023-01-10 09:13.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13576385.pt


Epoch 566/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:14.00 [info     ] DQN_20230110005015: epoch=566 step=13600414 epoch=566 metrics={'time_sample_batch': 6.089362715547652e-05, 'time_algorithm_update': 0.0017997887534512587, 'loss': 2.4864382765036186e-11, 'time_step': 0.002156400934548219, 'td_error': 9.169987185637783e-14, 'value_scale': -2.387310731814526e-05, 'environment': 0.029234335878357776} step=13600414
2023-01-10 09:14.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13600414.pt


Epoch 567/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:14.54 [info     ] DQN_20230110005015: epoch=567 step=13624443 epoch=567 metrics={'time_sample_batch': 6.0260298348449484e-05, 'time_algorithm_update': 0.0018073662949926056, 'loss': 2.6302298326530342e-11, 'time_step': 0.0021673123971160477, 'td_error': 1.0572670347150942e-14, 'value_scale': 8.062635263960593e-06, 'environment': -0.2743566586944633} step=13624443
2023-01-10 09:14.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13624443.pt


Epoch 568/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:15.49 [info     ] DQN_20230110005015: epoch=568 step=13648472 epoch=568 metrics={'time_sample_batch': 6.148451906280664e-05, 'time_algorithm_update': 0.0018400952453752986, 'loss': 2.5097203599604253e-11, 'time_step': 0.00219814473459786, 'td_error': 2.2658073898139002e-14, 'value_scale': 1.1853465623662379e-05, 'environment': -0.2355703284218146} step=13648472
2023-01-10 09:15.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13648472.pt


Epoch 569/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:16.43 [info     ] DQN_20230110005015: epoch=569 step=13672501 epoch=569 metrics={'time_sample_batch': 6.107287022141641e-05, 'time_algorithm_update': 0.001825261638923463, 'loss': 2.5581090316556336e-11, 'time_step': 0.0021831807912008716, 'td_error': 3.718896190216713e-13, 'value_scale': 4.811691580781358e-05, 'environment': -0.2128854021868541} step=13672501
2023-01-10 09:16.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13672501.pt


Epoch 570/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:17.36 [info     ] DQN_20230110005015: epoch=570 step=13696530 epoch=570 metrics={'time_sample_batch': 6.003715983143192e-05, 'time_algorithm_update': 0.0017847250729217587, 'loss': 2.5582331062267664e-11, 'time_step': 0.002141407611759133, 'td_error': 7.368013993750165e-13, 'value_scale': 6.772719929905447e-05, 'environment': 0.07993958194200475} step=13696530
2023-01-10 09:17.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13696530.pt


Epoch 571/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:18.31 [info     ] DQN_20230110005015: epoch=571 step=13720559 epoch=571 metrics={'time_sample_batch': 6.137486973417659e-05, 'time_algorithm_update': 0.0018279294385421469, 'loss': 2.524521307793958e-11, 'time_step': 0.0021911417134687534, 'td_error': 9.497158194255003e-13, 'value_scale': -7.689082240046445e-05, 'environment': -0.4949389205166586} step=13720559
2023-01-10 09:18.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13720559.pt


Epoch 572/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:19.26 [info     ] DQN_20230110005015: epoch=572 step=13744588 epoch=572 metrics={'time_sample_batch': 6.212615268279751e-05, 'time_algorithm_update': 0.0018484844161883796, 'loss': 2.520439223689391e-11, 'time_step': 0.0022114492731746442, 'td_error': 9.71409031506988e-14, 'value_scale': -2.4562276475508577e-05, 'environment': -0.1188141068613855} step=13744588
2023-01-10 09:19.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13744588.pt


Epoch 573/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:20.21 [info     ] DQN_20230110005015: epoch=573 step=13768617 epoch=573 metrics={'time_sample_batch': 6.220464656016538e-05, 'time_algorithm_update': 0.0018420409529172127, 'loss': 2.4830908367695754e-11, 'time_step': 0.002204218747796496, 'td_error': 1.7398134369313712e-15, 'value_scale': 2.897580908251595e-06, 'environment': 0.2920169461570955} step=13768617
2023-01-10 09:20.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13768617.pt


Epoch 574/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:21.17 [info     ] DQN_20230110005015: epoch=574 step=13792646 epoch=574 metrics={'time_sample_batch': 6.164157627237002e-05, 'time_algorithm_update': 0.0018488422276162193, 'loss': 2.5850722570774653e-11, 'time_step': 0.0022144479754365107, 'td_error': 1.558763292618955e-13, 'value_scale': -3.1129930742696706e-05, 'environment': -0.35793331412821916} step=13792646
2023-01-10 09:21.17 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13792646.pt


Epoch 575/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:22.10 [info     ] DQN_20230110005015: epoch=575 step=13816675 epoch=575 metrics={'time_sample_batch': 6.062481712821698e-05, 'time_algorithm_update': 0.0017832926266340933, 'loss': 2.482258244037994e-11, 'time_step': 0.0021378775106108815, 'td_error': 5.3396074088923113e-14, 'value_scale': -1.817333677700452e-05, 'environment': -0.2102834141358254} step=13816675
2023-01-10 09:22.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13816675.pt


Epoch 576/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:23.05 [info     ] DQN_20230110005015: epoch=576 step=13840704 epoch=576 metrics={'time_sample_batch': 6.184812500769638e-05, 'time_algorithm_update': 0.0018431092971540766, 'loss': 2.4977085403165625e-11, 'time_step': 0.0022065253525450214, 'td_error': 2.43529657463127e-14, 'value_scale': 1.2250278214517965e-05, 'environment': -0.13633939058288674} step=13840704
2023-01-10 09:23.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13840704.pt


Epoch 577/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:24.00 [info     ] DQN_20230110005015: epoch=577 step=13864733 epoch=577 metrics={'time_sample_batch': 6.065308524307238e-05, 'time_algorithm_update': 0.0018265264014129341, 'loss': 2.5279588141724696e-11, 'time_step': 0.0021898203651389806, 'td_error': 5.898766705712183e-16, 'value_scale': 1.5619289382489745e-06, 'environment': -0.1568556830488475} step=13864733
2023-01-10 09:24.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13864733.pt


Epoch 578/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:24.55 [info     ] DQN_20230110005015: epoch=578 step=13888762 epoch=578 metrics={'time_sample_batch': 6.149410382902297e-05, 'time_algorithm_update': 0.0018235050271108975, 'loss': 2.5397578715627302e-11, 'time_step': 0.002187537742150232, 'td_error': 4.340124455266254e-13, 'value_scale': -5.197435140546309e-05, 'environment': 0.018936240230402943} step=13888762
2023-01-10 09:24.55 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13888762.pt


Epoch 579/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:25.49 [info     ] DQN_20230110005015: epoch=579 step=13912791 epoch=579 metrics={'time_sample_batch': 6.167426965195884e-05, 'time_algorithm_update': 0.0018053344039316902, 'loss': 2.4584689932360194e-11, 'time_step': 0.002168814764570925, 'td_error': 1.8190663211999463e-13, 'value_scale': -3.3633230296300525e-05, 'environment': -0.4515632371508215} step=13912791
2023-01-10 09:25.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13912791.pt


Epoch 580/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:26.43 [info     ] DQN_20230110005015: epoch=580 step=13936820 epoch=580 metrics={'time_sample_batch': 6.159617265932043e-05, 'time_algorithm_update': 0.0018027808376602616, 'loss': 2.4878844679565922e-11, 'time_step': 0.0021624145220467956, 'td_error': 5.535989442623303e-13, 'value_scale': 5.870070091049277e-05, 'environment': -0.23107369348148699} step=13936820
2023-01-10 09:26.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13936820.pt


Epoch 581/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:27.36 [info     ] DQN_20230110005015: epoch=581 step=13960849 epoch=581 metrics={'time_sample_batch': 6.013073532851906e-05, 'time_algorithm_update': 0.0017788249944558061, 'loss': 2.5517864646020983e-11, 'time_step': 0.0021373948987798647, 'td_error': 8.516660823161167e-14, 'value_scale': 2.2998440649774396e-05, 'environment': 0.06940148382339381} step=13960849
2023-01-10 09:27.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13960849.pt


Epoch 582/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:28.32 [info     ] DQN_20230110005015: epoch=582 step=13984878 epoch=582 metrics={'time_sample_batch': 6.265153876553116e-05, 'time_algorithm_update': 0.0018540718586308228, 'loss': 2.4729121412829314e-11, 'time_step': 0.002218460103010803, 'td_error': 1.5669630663259584e-13, 'value_scale': 3.121491754620278e-05, 'environment': -0.06060546569957339} step=13984878
2023-01-10 09:28.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_13984878.pt


Epoch 583/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:29.26 [info     ] DQN_20230110005015: epoch=583 step=14008907 epoch=583 metrics={'time_sample_batch': 6.180418986814578e-05, 'time_algorithm_update': 0.0018315214239418385, 'loss': 2.6360357365841334e-11, 'time_step': 0.002194380124185092, 'td_error': 1.1171041532232539e-13, 'value_scale': 2.634439251051401e-05, 'environment': -0.2872130808386118} step=14008907
2023-01-10 09:29.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14008907.pt


Epoch 584/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:30.22 [info     ] DQN_20230110005015: epoch=584 step=14032936 epoch=584 metrics={'time_sample_batch': 6.17032918478002e-05, 'time_algorithm_update': 0.0018432537929619528, 'loss': 2.5974540107760204e-11, 'time_step': 0.00220816807828447, 'td_error': 5.551915025638878e-16, 'value_scale': 1.1755708725681306e-06, 'environment': -0.178255459233697} step=14032936
2023-01-10 09:30.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14032936.pt


Epoch 585/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:31.15 [info     ] DQN_20230110005015: epoch=585 step=14056965 epoch=585 metrics={'time_sample_batch': 5.9946868555481015e-05, 'time_algorithm_update': 0.0017924372377633364, 'loss': 2.543240986376634e-11, 'time_step': 0.0021525766425942593, 'td_error': 6.295596403870337e-15, 'value_scale': 6.147255739201702e-06, 'environment': -0.1249220861504872} step=14056965
2023-01-10 09:31.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14056965.pt


Epoch 586/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:32.10 [info     ] DQN_20230110005015: epoch=586 step=14080994 epoch=586 metrics={'time_sample_batch': 6.196334064465703e-05, 'time_algorithm_update': 0.001849697980547751, 'loss': 2.579068461337412e-11, 'time_step': 0.002217514505298641, 'td_error': 3.0102295821712395e-14, 'value_scale': -1.3633141608280787e-05, 'environment': -0.21496544678620594} step=14080994
2023-01-10 09:32.10 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14080994.pt


Epoch 587/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:33.05 [info     ] DQN_20230110005015: epoch=587 step=14105023 epoch=587 metrics={'time_sample_batch': 6.192521986639293e-05, 'time_algorithm_update': 0.0018234882388868416, 'loss': 2.4439950538060993e-11, 'time_step': 0.0021866470038297992, 'td_error': 5.73371065956729e-14, 'value_scale': 1.88174852239485e-05, 'environment': 0.26415217569245486} step=14105023
2023-01-10 09:33.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14105023.pt


Epoch 588/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:34.01 [info     ] DQN_20230110005015: epoch=588 step=14129052 epoch=588 metrics={'time_sample_batch': 6.157482026087577e-05, 'time_algorithm_update': 0.001872088470690765, 'loss': 2.5728440396069715e-11, 'time_step': 0.0022365247579672455, 'td_error': 2.0474546101378804e-13, 'value_scale': -3.568697104249208e-05, 'environment': -0.25982209113457594} step=14129052
2023-01-10 09:34.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14129052.pt


Epoch 589/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:34.56 [info     ] DQN_20230110005015: epoch=589 step=14153081 epoch=589 metrics={'time_sample_batch': 6.180112393361074e-05, 'time_algorithm_update': 0.0018502704470815161, 'loss': 2.502438702732376e-11, 'time_step': 0.0022178697965084485, 'td_error': 1.1260529690860285e-13, 'value_scale': 2.6458964013536796e-05, 'environment': -0.16374935980116725} step=14153081
2023-01-10 09:34.56 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14153081.pt


Epoch 590/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:35.51 [info     ] DQN_20230110005015: epoch=590 step=14177110 epoch=590 metrics={'time_sample_batch': 6.209562232498422e-05, 'time_algorithm_update': 0.0018357387706876133, 'loss': 2.577253945132779e-11, 'time_step': 0.002201146988978055, 'td_error': 3.459122406279917e-13, 'value_scale': 4.6396448753186237e-05, 'environment': -0.5214299618177884} step=14177110
2023-01-10 09:35.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14177110.pt


Epoch 591/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:36.46 [info     ] DQN_20230110005015: epoch=591 step=14201139 epoch=591 metrics={'time_sample_batch': 6.200683921100524e-05, 'time_algorithm_update': 0.0018394620157894538, 'loss': 2.638815047885085e-11, 'time_step': 0.002205076197410246, 'td_error': 1.8007846664615275e-13, 'value_scale': 3.3474854404758585e-05, 'environment': -0.3171660804108477} step=14201139
2023-01-10 09:36.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14201139.pt


Epoch 592/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:37.40 [info     ] DQN_20230110005015: epoch=592 step=14225168 epoch=592 metrics={'time_sample_batch': 6.114048945719723e-05, 'time_algorithm_update': 0.0017928479737698488, 'loss': 2.492514929870179e-11, 'time_step': 0.0021537862083399815, 'td_error': 2.5245662986586566e-13, 'value_scale': -3.963905589070964e-05, 'environment': -0.20607580080699495} step=14225168
2023-01-10 09:37.40 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14225168.pt


Epoch 593/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:38.34 [info     ] DQN_20230110005015: epoch=593 step=14249197 epoch=593 metrics={'time_sample_batch': 6.0570572907818785e-05, 'time_algorithm_update': 0.0017916700595813008, 'loss': 2.414414881976268e-11, 'time_step': 0.0021507681182590813, 'td_error': 1.4582979934163094e-14, 'value_scale': 9.469379967778023e-06, 'environment': -0.26113665211951226} step=14249197
2023-01-10 09:38.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14249197.pt


Epoch 594/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:39.29 [info     ] DQN_20230110005015: epoch=594 step=14273226 epoch=594 metrics={'time_sample_batch': 6.272933809462248e-05, 'time_algorithm_update': 0.001857320697493428, 'loss': 2.5757816591655883e-11, 'time_step': 0.0022237243919844057, 'td_error': 2.0981529738096196e-12, 'value_scale': -0.00011429970437131256, 'environment': 0.2734229358960062} step=14273226
2023-01-10 09:39.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14273226.pt


Epoch 595/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:40.24 [info     ] DQN_20230110005015: epoch=595 step=14297255 epoch=595 metrics={'time_sample_batch': 6.204004854074236e-05, 'time_algorithm_update': 0.0018408196393834989, 'loss': 2.5848126259426478e-11, 'time_step': 0.0022060867552305013, 'td_error': 1.856020493837112e-14, 'value_scale': -1.0708656591355745e-05, 'environment': -0.3983012997114609} step=14297255
2023-01-10 09:40.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14297255.pt


Epoch 596/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:41.18 [info     ] DQN_20230110005015: epoch=596 step=14321284 epoch=596 metrics={'time_sample_batch': 6.098411687379215e-05, 'time_algorithm_update': 0.0017980126645205951, 'loss': 2.498104722812972e-11, 'time_step': 0.0021588917136554477, 'td_error': 1.1298625817951953e-13, 'value_scale': -2.6512776493508316e-05, 'environment': -0.24974508261454398} step=14321284
2023-01-10 09:41.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14321284.pt


Epoch 597/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:42.11 [info     ] DQN_20230110005015: epoch=597 step=14345313 epoch=597 metrics={'time_sample_batch': 6.010748780549126e-05, 'time_algorithm_update': 0.0017844861284703906, 'loss': 2.4539569738552207e-11, 'time_step': 0.0021396651984977636, 'td_error': 1.084879835386536e-13, 'value_scale': -2.5975985751571137e-05, 'environment': -0.0728539924862724} step=14345313
2023-01-10 09:42.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14345313.pt


Epoch 598/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:43.07 [info     ] DQN_20230110005015: epoch=598 step=14369342 epoch=598 metrics={'time_sample_batch': 6.188129464896054e-05, 'time_algorithm_update': 0.0018633894122809258, 'loss': 2.5342715161828125e-11, 'time_step': 0.002230616920404832, 'td_error': 6.269511259443664e-13, 'value_scale': 6.247738603592478e-05, 'environment': -0.055809345793539175} step=14369342
2023-01-10 09:43.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14369342.pt


Epoch 599/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:44.02 [info     ] DQN_20230110005015: epoch=599 step=14393371 epoch=599 metrics={'time_sample_batch': 6.080955704765982e-05, 'time_algorithm_update': 0.0018249957956095561, 'loss': 2.5383407483045702e-11, 'time_step': 0.0021889201015850403, 'td_error': 3.062315347287244e-14, 'value_scale': 1.378846673420638e-05, 'environment': -0.3334503197992343} step=14393371
2023-01-10 09:44.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14393371.pt


Epoch 600/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:44.57 [info     ] DQN_20230110005015: epoch=600 step=14417400 epoch=600 metrics={'time_sample_batch': 6.173849552330282e-05, 'time_algorithm_update': 0.0018534428955558197, 'loss': 2.521562735791276e-11, 'time_step': 0.0022170761461149584, 'td_error': 4.3496809433458417e-13, 'value_scale': 5.203799370307706e-05, 'environment': -0.1025867092119446} step=14417400
2023-01-10 09:44.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14417400.pt


Epoch 601/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:45.52 [info     ] DQN_20230110005015: epoch=601 step=14441429 epoch=601 metrics={'time_sample_batch': 6.245855356583391e-05, 'time_algorithm_update': 0.0018433006154379102, 'loss': 2.6870564196607286e-11, 'time_step': 0.002212000159101245, 'td_error': 1.1042071129485798e-13, 'value_scale': 2.621064669150315e-05, 'environment': -0.20918558851137675} step=14441429
2023-01-10 09:45.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14441429.pt


Epoch 602/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:46.47 [info     ] DQN_20230110005015: epoch=602 step=14465458 epoch=602 metrics={'time_sample_batch': 6.055203839095326e-05, 'time_algorithm_update': 0.001828400947522857, 'loss': 2.586206523777732e-11, 'time_step': 0.002189661412804834, 'td_error': 4.7411651728990234e-14, 'value_scale': -1.7153222191370717e-05, 'environment': -0.22293349574722604} step=14465458
2023-01-10 09:46.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14465458.pt


Epoch 603/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:47.41 [info     ] DQN_20230110005015: epoch=603 step=14489487 epoch=603 metrics={'time_sample_batch': 6.088466748270909e-05, 'time_algorithm_update': 0.0018008284128443015, 'loss': 2.5359144892412564e-11, 'time_step': 0.002163040012380416, 'td_error': 3.2813961021876187e-14, 'value_scale': -1.4261324145905236e-05, 'environment': -0.35390297738949955} step=14489487
2023-01-10 09:47.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14489487.pt


Epoch 604/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:48.35 [info     ] DQN_20230110005015: epoch=604 step=14513516 epoch=604 metrics={'time_sample_batch': 6.210249835292202e-05, 'time_algorithm_update': 0.001824602830116805, 'loss': 2.644496132527544e-11, 'time_step': 0.0021903316221253454, 'td_error': 3.775961539946813e-13, 'value_scale': -4.847903526350773e-05, 'environment': -0.1805361537321724} step=14513516
2023-01-10 09:48.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14513516.pt


Epoch 605/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:49.30 [info     ] DQN_20230110005015: epoch=605 step=14537545 epoch=605 metrics={'time_sample_batch': 6.125744146484924e-05, 'time_algorithm_update': 0.0018085077751629898, 'loss': 2.5041934705819497e-11, 'time_step': 0.0021729477633895726, 'td_error': 2.493649366861025e-13, 'value_scale': -3.940108736637443e-05, 'environment': 0.07742874844373208} step=14537545
2023-01-10 09:49.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14537545.pt


Epoch 606/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:50.23 [info     ] DQN_20230110005015: epoch=606 step=14561574 epoch=606 metrics={'time_sample_batch': 6.095106629794683e-05, 'time_algorithm_update': 0.0017939099976173962, 'loss': 2.6313446576794766e-11, 'time_step': 0.002156653809653591, 'td_error': 3.3267071074303566e-13, 'value_scale': -4.550858962816392e-05, 'environment': -0.1388114832433638} step=14561574
2023-01-10 09:50.23 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14561574.pt


Epoch 607/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:51.18 [info     ] DQN_20230110005015: epoch=607 step=14585603 epoch=607 metrics={'time_sample_batch': 6.13053752180491e-05, 'time_algorithm_update': 0.0018360085332382236, 'loss': 2.6355099525195235e-11, 'time_step': 0.0022023076287587535, 'td_error': 3.633888252835301e-14, 'value_scale': 1.5031989294958984e-05, 'environment': -0.31699652504045484} step=14585603
2023-01-10 09:51.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14585603.pt


Epoch 608/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:52.13 [info     ] DQN_20230110005015: epoch=608 step=14609632 epoch=608 metrics={'time_sample_batch': 6.163394616344625e-05, 'time_algorithm_update': 0.0018369072985523099, 'loss': 2.540221128464801e-11, 'time_step': 0.002203567926295018, 'td_error': 2.3661725257166265e-13, 'value_scale': 3.8379418914711046e-05, 'environment': 0.03634766203200043} step=14609632
2023-01-10 09:52.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14609632.pt


Epoch 609/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:53.07 [info     ] DQN_20230110005015: epoch=609 step=14633661 epoch=609 metrics={'time_sample_batch': 6.047232409304232e-05, 'time_algorithm_update': 0.0017893935585395044, 'loss': 2.5204376978713325e-11, 'time_step': 0.0021498669914808547, 'td_error': 6.692419727863057e-13, 'value_scale': 6.45525800101076e-05, 'environment': -0.46931712426361616} step=14633661
2023-01-10 09:53.07 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14633661.pt


Epoch 610/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:54.02 [info     ] DQN_20230110005015: epoch=610 step=14657690 epoch=610 metrics={'time_sample_batch': 6.23338523838393e-05, 'time_algorithm_update': 0.0018304120715903038, 'loss': 2.4567570206630688e-11, 'time_step': 0.0021987836297132175, 'td_error': 5.321876213673749e-13, 'value_scale': 5.756340366531861e-05, 'environment': -0.38417274206746105} step=14657690
2023-01-10 09:54.02 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14657690.pt


Epoch 611/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:54.57 [info     ] DQN_20230110005015: epoch=611 step=14681719 epoch=611 metrics={'time_sample_batch': 6.162888588314572e-05, 'time_algorithm_update': 0.0018598378297466926, 'loss': 2.5595964072127116e-11, 'time_step': 0.002226057310190494, 'td_error': 3.8530440497012243e-13, 'value_scale': 4.897780933451593e-05, 'environment': -0.10977815510928561} step=14681719
2023-01-10 09:54.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14681719.pt


Epoch 612/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:55.53 [info     ] DQN_20230110005015: epoch=612 step=14705748 epoch=612 metrics={'time_sample_batch': 6.200848628263248e-05, 'time_algorithm_update': 0.0018599975063954704, 'loss': 2.5925467627297337e-11, 'time_step': 0.0022264072037679813, 'td_error': 1.6150277204573805e-13, 'value_scale': 3.1706423168761816e-05, 'environment': -0.32878182125204747} step=14705748
2023-01-10 09:55.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14705748.pt


Epoch 613/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:56.46 [info     ] DQN_20230110005015: epoch=613 step=14729777 epoch=613 metrics={'time_sample_batch': 6.0475598392060315e-05, 'time_algorithm_update': 0.0017870363509879734, 'loss': 2.5121679564881507e-11, 'time_step': 0.0021456152943617455, 'td_error': 3.122460177215935e-13, 'value_scale': 4.40883934498152e-05, 'environment': -0.26164671673731144} step=14729777
2023-01-10 09:56.46 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14729777.pt


Epoch 614/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:57.41 [info     ] DQN_20230110005015: epoch=614 step=14753806 epoch=614 metrics={'time_sample_batch': 6.200514252878683e-05, 'time_algorithm_update': 0.0018435611801849152, 'loss': 2.405985251753278e-11, 'time_step': 0.002208111174936385, 'td_error': 2.057448575128067e-13, 'value_scale': 3.5785360792430264e-05, 'environment': -0.10718766249710668} step=14753806
2023-01-10 09:57.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14753806.pt


Epoch 615/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:58.37 [info     ] DQN_20230110005015: epoch=615 step=14777835 epoch=615 metrics={'time_sample_batch': 6.249596987370323e-05, 'time_algorithm_update': 0.001848060989792643, 'loss': 2.6245645760168333e-11, 'time_step': 0.0022160589206312482, 'td_error': 9.750091249260352e-16, 'value_scale': -2.3600296618713646e-06, 'environment': -0.029926769622056055} step=14777835
2023-01-10 09:58.37 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14777835.pt


Epoch 616/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 09:59.31 [info     ] DQN_20230110005015: epoch=616 step=14801864 epoch=616 metrics={'time_sample_batch': 6.167803013477043e-05, 'time_algorithm_update': 0.0018237584380106542, 'loss': 2.4843335916747753e-11, 'time_step': 0.0021878612925038017, 'td_error': 1.3998604119524095e-13, 'value_scale': 2.9514055436370456e-05, 'environment': 0.18478000620011498} step=14801864
2023-01-10 09:59.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14801864.pt


Epoch 617/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:00.25 [info     ] DQN_20230110005015: epoch=617 step=14825893 epoch=617 metrics={'time_sample_batch': 6.169534423109287e-05, 'time_algorithm_update': 0.001811224123706204, 'loss': 2.656856633830471e-11, 'time_step': 0.002173121906486739, 'td_error': 4.71966042503139e-13, 'value_scale': 5.420471418613027e-05, 'environment': -0.15326002133417713} step=14825893
2023-01-10 10:00.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14825893.pt


Epoch 618/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:01.21 [info     ] DQN_20230110005015: epoch=618 step=14849922 epoch=618 metrics={'time_sample_batch': 6.234603674503355e-05, 'time_algorithm_update': 0.001851214576320179, 'loss': 2.5335234021355656e-11, 'time_step': 0.002218303611361979, 'td_error': 1.9314055848245784e-14, 'value_scale': 1.0927277969961628e-05, 'environment': -0.4781590830543501} step=14849922
2023-01-10 10:01.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14849922.pt


Epoch 619/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:02.15 [info     ] DQN_20230110005015: epoch=619 step=14873951 epoch=619 metrics={'time_sample_batch': 6.0067987852792296e-05, 'time_algorithm_update': 0.0018047623144384181, 'loss': 2.5507316510961665e-11, 'time_step': 0.0021669385515451383, 'td_error': 2.20623984976399e-14, 'value_scale': -1.1678088934780392e-05, 'environment': -0.38718025989835747} step=14873951
2023-01-10 10:02.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14873951.pt


Epoch 620/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:03.11 [info     ] DQN_20230110005015: epoch=620 step=14897980 epoch=620 metrics={'time_sample_batch': 6.254801138385295e-05, 'time_algorithm_update': 0.001865962654112817, 'loss': 2.5605134361668585e-11, 'time_step': 0.002231422189598777, 'td_error': 4.534372634574114e-13, 'value_scale': 5.313009104291492e-05, 'environment': -0.555463202198737} step=14897980
2023-01-10 10:03.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14897980.pt


Epoch 621/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:04.04 [info     ] DQN_20230110005015: epoch=621 step=14922009 epoch=621 metrics={'time_sample_batch': 6.014281054641271e-05, 'time_algorithm_update': 0.0017894763586161868, 'loss': 2.5949693841558028e-11, 'time_step': 0.0021500374137836823, 'td_error': 1.6991300201594235e-12, 'value_scale': 0.0001028584865262072, 'environment': 0.10490179866859313} step=14922009
2023-01-10 10:04.04 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14922009.pt


Epoch 622/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:05.03 [info     ] DQN_20230110005015: epoch=622 step=14946038 epoch=622 metrics={'time_sample_batch': 6.976082578094926e-05, 'time_algorithm_update': 0.0019288115438100694, 'loss': 2.5280829389158783e-11, 'time_step': 0.002349810404021263, 'td_error': 7.984128297842489e-13, 'value_scale': -7.050626857835051e-05, 'environment': 0.07482305990533251} step=14946038
2023-01-10 10:05.03 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14946038.pt


Epoch 623/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:06.09 [info     ] DQN_20230110005015: epoch=623 step=14970067 epoch=623 metrics={'time_sample_batch': 8.442424806079454e-05, 'time_algorithm_update': 0.002182913608401003, 'loss': 2.721378630274445e-11, 'time_step': 0.002658778587537211, 'td_error': 8.566876497222935e-13, 'value_scale': -7.30348022756262e-05, 'environment': -0.22928759488519676} step=14970067
2023-01-10 10:06.09 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14970067.pt


Epoch 624/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:07.01 [info     ] DQN_20230110005015: epoch=624 step=14994096 epoch=624 metrics={'time_sample_batch': 5.817737783344454e-05, 'time_algorithm_update': 0.001753278486866288, 'loss': 2.3994067429699505e-11, 'time_step': 0.002094675575909461, 'td_error': 9.421990157536563e-14, 'value_scale': -2.421307946209765e-05, 'environment': -0.2819619965077423} step=14994096
2023-01-10 10:07.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_14994096.pt


Epoch 625/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:07.53 [info     ] DQN_20230110005015: epoch=625 step=15018125 epoch=625 metrics={'time_sample_batch': 5.6679782917439115e-05, 'time_algorithm_update': 0.0017438963104601116, 'loss': 2.5195513191554908e-11, 'time_step': 0.002082554617050738, 'td_error': 2.600838298524786e-13, 'value_scale': 4.023851408867488e-05, 'environment': -0.18984947130017174} step=15018125
2023-01-10 10:07.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15018125.pt


Epoch 626/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:08.47 [info     ] DQN_20230110005015: epoch=626 step=15042154 epoch=626 metrics={'time_sample_batch': 5.96991033410008e-05, 'time_algorithm_update': 0.001828328535903968, 'loss': 2.510633275742738e-11, 'time_step': 0.0021725684308872783, 'td_error': 3.164014260116616e-14, 'value_scale': -1.4006152535891249e-05, 'environment': -0.1744942433906691} step=15042154
2023-01-10 10:08.47 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15042154.pt


Epoch 627/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:09.40 [info     ] DQN_20230110005015: epoch=627 step=15066183 epoch=627 metrics={'time_sample_batch': 5.85363997597091e-05, 'time_algorithm_update': 0.0017940629768363645, 'loss': 2.4538418488565816e-11, 'time_step': 0.002136434398046112, 'td_error': 2.21389544422323e-14, 'value_scale': -1.171369475730271e-05, 'environment': -0.03694361020799323} step=15066183
2023-01-10 10:09.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15066183.pt


Epoch 628/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:10.32 [info     ] DQN_20230110005015: epoch=628 step=15090212 epoch=628 metrics={'time_sample_batch': 5.6529829944532966e-05, 'time_algorithm_update': 0.0017518553177591734, 'loss': 2.506944399957409e-11, 'time_step': 0.0020876168023979845, 'td_error': 6.23091544922006e-14, 'value_scale': -1.96816574444184e-05, 'environment': -0.23607062220525318} step=15090212
2023-01-10 10:10.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15090212.pt


Epoch 629/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:11.27 [info     ] DQN_20230110005015: epoch=629 step=15114241 epoch=629 metrics={'time_sample_batch': 5.933898005961201e-05, 'time_algorithm_update': 0.0018180825597283704, 'loss': 2.446900485097156e-11, 'time_step': 0.0021641985189056936, 'td_error': 2.7386843025419886e-14, 'value_scale': -1.3031060583563222e-05, 'environment': -0.16908152974457671} step=15114241
2023-01-10 10:11.27 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15114241.pt


Epoch 630/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:12.19 [info     ] DQN_20230110005015: epoch=630 step=15138270 epoch=630 metrics={'time_sample_batch': 5.782903210640231e-05, 'time_algorithm_update': 0.0017708630105212732, 'loss': 2.4784858378982188e-11, 'time_step': 0.002110537371785658, 'td_error': 7.332349510240153e-15, 'value_scale': -6.543728123901833e-06, 'environment': -0.12094558625205569} step=15138270
2023-01-10 10:12.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15138270.pt


Epoch 631/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:13.12 [info     ] DQN_20230110005015: epoch=631 step=15162299 epoch=631 metrics={'time_sample_batch': 5.823749594783866e-05, 'time_algorithm_update': 0.0017873177224169202, 'loss': 2.520346848198226e-11, 'time_step': 0.002132951744467267, 'td_error': 6.635313574080281e-13, 'value_scale': 6.426978229185898e-05, 'environment': -0.5757103038621268} step=15162299
2023-01-10 10:13.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15162299.pt


Epoch 632/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:14.05 [info     ] DQN_20230110005015: epoch=632 step=15186328 epoch=632 metrics={'time_sample_batch': 5.7235193329956e-05, 'time_algorithm_update': 0.0017795744219683168, 'loss': 2.546058875095946e-11, 'time_step': 0.002119011743828035, 'td_error': 9.757237279287243e-16, 'value_scale': 2.206933948334813e-06, 'environment': -0.31493096229799217} step=15186328
2023-01-10 10:14.05 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15186328.pt


Epoch 633/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:14.58 [info     ] DQN_20230110005015: epoch=633 step=15210357 epoch=633 metrics={'time_sample_batch': 5.759485027178768e-05, 'time_algorithm_update': 0.00176450189090935, 'loss': 2.5680314170211516e-11, 'time_step': 0.002104993011180597, 'td_error': 5.372241101047173e-14, 'value_scale': -1.8261550800518392e-05, 'environment': -0.008482882194884378} step=15210357
2023-01-10 10:14.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15210357.pt


Epoch 634/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:15.51 [info     ] DQN_20230110005015: epoch=634 step=15234386 epoch=634 metrics={'time_sample_batch': 5.803353688537199e-05, 'time_algorithm_update': 0.0017846925779845346, 'loss': 2.459298051214616e-11, 'time_step': 0.00212742232657227, 'td_error': 2.705833539888848e-13, 'value_scale': 4.103514978501631e-05, 'environment': -0.2980658109088651} step=15234386
2023-01-10 10:15.51 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15234386.pt


Epoch 635/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:16.43 [info     ] DQN_20230110005015: epoch=635 step=15258415 epoch=635 metrics={'time_sample_batch': 5.656427953904962e-05, 'time_algorithm_update': 0.0017356599303457521, 'loss': 2.5064716197650995e-11, 'time_step': 0.002073661720139033, 'td_error': 1.5961735266899768e-14, 'value_scale': 9.927917184283436e-06, 'environment': -0.569303653484836} step=15258415
2023-01-10 10:16.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15258415.pt


Epoch 636/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:17.36 [info     ] DQN_20230110005015: epoch=636 step=15282444 epoch=636 metrics={'time_sample_batch': 5.8408285369041176e-05, 'time_algorithm_update': 0.0018006118130032016, 'loss': 2.5533414586934183e-11, 'time_step': 0.002143136074522262, 'td_error': 2.310881114567632e-13, 'value_scale': -3.7919305009735277e-05, 'environment': -0.8126491268652446} step=15282444
2023-01-10 10:17.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15282444.pt


Epoch 637/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:18.28 [info     ] DQN_20230110005015: epoch=637 step=15306473 epoch=637 metrics={'time_sample_batch': 5.6567355395702887e-05, 'time_algorithm_update': 0.0017503085091367158, 'loss': 2.5002907435292788e-11, 'time_step': 0.002088175774850948, 'td_error': 2.331588629161269e-15, 'value_scale': 3.700842134469088e-06, 'environment': -0.04405659088902873} step=15306473
2023-01-10 10:18.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15306473.pt


Epoch 638/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:19.22 [info     ] DQN_20230110005015: epoch=638 step=15330502 epoch=638 metrics={'time_sample_batch': 5.93067530195803e-05, 'time_algorithm_update': 0.0018005340533625831, 'loss': 2.5189196617323767e-11, 'time_step': 0.0021575429703572244, 'td_error': 1.239523818077876e-15, 'value_scale': -2.641096746233502e-06, 'environment': -0.2415767966226377} step=15330502
2023-01-10 10:19.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15330502.pt


Epoch 639/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:20.15 [info     ] DQN_20230110005015: epoch=639 step=15354531 epoch=639 metrics={'time_sample_batch': 5.774257076809065e-05, 'time_algorithm_update': 0.0017695602959295481, 'loss': 2.5422123205186153e-11, 'time_step': 0.0021134682067587875, 'td_error': 1.4004793358839103e-15, 'value_scale': 2.8235905632443763e-06, 'environment': -0.2254026435344601} step=15354531
2023-01-10 10:20.15 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15354531.pt


Epoch 640/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:21.08 [info     ] DQN_20230110005015: epoch=640 step=15378560 epoch=640 metrics={'time_sample_batch': 5.7463501270574706e-05, 'time_algorithm_update': 0.00178884404186522, 'loss': 2.5652145450568276e-11, 'time_step': 0.002129611840403488, 'td_error': 1.4478469749200292e-14, 'value_scale': 9.42646895709478e-06, 'environment': 0.0184095728160207} step=15378560
2023-01-10 10:21.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15378560.pt


Epoch 641/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:22.00 [info     ] DQN_20230110005015: epoch=641 step=15402589 epoch=641 metrics={'time_sample_batch': 5.656838729599947e-05, 'time_algorithm_update': 0.0017501401605565993, 'loss': 2.5642192065415937e-11, 'time_step': 0.002091430844803806, 'td_error': 2.099984118933058e-14, 'value_scale': 1.13863320489743e-05, 'environment': -0.28361252203663795} step=15402589
2023-01-10 10:22.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15402589.pt


Epoch 642/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:22.53 [info     ] DQN_20230110005015: epoch=642 step=15426618 epoch=642 metrics={'time_sample_batch': 5.724781426435265e-05, 'time_algorithm_update': 0.001763187597205643, 'loss': 2.609064001171524e-11, 'time_step': 0.0021020003415666184, 'td_error': 1.0991074509014044e-13, 'value_scale': -2.6138222189532865e-05, 'environment': -0.778200441378589} step=15426618
2023-01-10 10:22.53 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15426618.pt


Epoch 643/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:23.44 [info     ] DQN_20230110005015: epoch=643 step=15450647 epoch=643 metrics={'time_sample_batch': 5.7940169752767816e-05, 'time_algorithm_update': 0.001729718357581335, 'loss': 2.5679225499680945e-11, 'time_step': 0.002069677017299546, 'td_error': 2.6377872922142193e-13, 'value_scale': -4.0505680691173985e-05, 'environment': -0.20012902328997648} step=15450647
2023-01-10 10:23.44 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15450647.pt


Epoch 644/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:24.36 [info     ] DQN_20230110005015: epoch=644 step=15474676 epoch=644 metrics={'time_sample_batch': 5.623944923222757e-05, 'time_algorithm_update': 0.0017446862995919726, 'loss': 2.562063025371421e-11, 'time_step': 0.002082295828362897, 'td_error': 5.882048800177736e-14, 'value_scale': 1.9107737856954652e-05, 'environment': 0.25547243877683606} step=15474676
2023-01-10 10:24.36 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15474676.pt


Epoch 645/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:25.42 [info     ] DQN_20230110005015: epoch=645 step=15498705 epoch=645 metrics={'time_sample_batch': 8.419438234761272e-05, 'time_algorithm_update': 0.0021415689156353015, 'loss': 2.5925169220821433e-11, 'time_step': 0.0026215449061761454, 'td_error': 7.99412256588095e-14, 'value_scale': -2.227938406536591e-05, 'environment': -0.48453503449788365} step=15498705
2023-01-10 10:25.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15498705.pt


Epoch 646/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:26.38 [info     ] DQN_20230110005015: epoch=646 step=15522734 epoch=646 metrics={'time_sample_batch': 6.551047806992027e-05, 'time_algorithm_update': 0.0018789847893603579, 'loss': 2.624201833819248e-11, 'time_step': 0.0022645384498668265, 'td_error': 1.2310520395015283e-12, 'value_scale': 8.754774844156809e-05, 'environment': -0.42814230884016347} step=15522734
2023-01-10 10:26.38 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15522734.pt


Epoch 647/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:27.31 [info     ] DQN_20230110005015: epoch=647 step=15546763 epoch=647 metrics={'time_sample_batch': 5.962609639501764e-05, 'time_algorithm_update': 0.0017779331051696577, 'loss': 2.5901041415339285e-11, 'time_step': 0.0021450066220185357, 'td_error': 6.05930767403134e-14, 'value_scale': 1.9387417933356782e-05, 'environment': -0.16328124308986983} step=15546763
2023-01-10 10:27.31 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15546763.pt


Epoch 648/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:28.32 [info     ] DQN_20230110005015: epoch=648 step=15570792 epoch=648 metrics={'time_sample_batch': 7.618563546983187e-05, 'time_algorithm_update': 0.0020298783393868434, 'loss': 2.4829381466873744e-11, 'time_step': 0.002449386530320561, 'td_error': 7.795624551796719e-13, 'value_scale': -6.966683160634437e-05, 'environment': -0.23642914758710482} step=15570792
2023-01-10 10:28.32 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15570792.pt


Epoch 649/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:29.26 [info     ] DQN_20230110005015: epoch=649 step=15594821 epoch=649 metrics={'time_sample_batch': 6.083555299743909e-05, 'time_algorithm_update': 0.0018086939835559315, 'loss': 2.4860561761259765e-11, 'time_step': 0.002171291900765579, 'td_error': 6.790493984412023e-13, 'value_scale': -6.502211294043572e-05, 'environment': -0.20375272285687346} step=15594821
2023-01-10 10:29.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15594821.pt


Epoch 650/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:30.21 [info     ] DQN_20230110005015: epoch=650 step=15618850 epoch=650 metrics={'time_sample_batch': 5.972815530319687e-05, 'time_algorithm_update': 0.0018157375563822701, 'loss': 2.548377168681939e-11, 'time_step': 0.002177116114715496, 'td_error': 2.8749194371392993e-13, 'value_scale': -4.230154893628289e-05, 'environment': -0.27755303164064365} step=15618850
2023-01-10 10:30.21 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15618850.pt


Epoch 651/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:31.16 [info     ] DQN_20230110005015: epoch=651 step=15642879 epoch=651 metrics={'time_sample_batch': 6.212257079811419e-05, 'time_algorithm_update': 0.0018188730052776702, 'loss': 2.5178257238731733e-11, 'time_step': 0.0021979663547562066, 'td_error': 2.1684010946101293e-14, 'value_scale': 1.1584374285527015e-05, 'environment': -0.2745925997903985} step=15642879
2023-01-10 10:31.16 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15642879.pt


Epoch 652/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:32.12 [info     ] DQN_20230110005015: epoch=652 step=15666908 epoch=652 metrics={'time_sample_batch': 6.348820154157599e-05, 'time_algorithm_update': 0.0018541988815884845, 'loss': 2.5460208992218227e-11, 'time_step': 0.002252002647244641, 'td_error': 4.630915430623227e-15, 'value_scale': -5.290410638272907e-06, 'environment': -0.30086926710790596} step=15666908
2023-01-10 10:32.12 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15666908.pt


Epoch 653/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:33.06 [info     ] DQN_20230110005015: epoch=653 step=15690937 epoch=653 metrics={'time_sample_batch': 6.0839660754388946e-05, 'time_algorithm_update': 0.0018069428685968689, 'loss': 2.5041364757572217e-11, 'time_step': 0.0021736326673671923, 'td_error': 1.1097233707023664e-15, 'value_scale': -2.462415218863736e-06, 'environment': -0.31128770679036394} step=15690937
2023-01-10 10:33.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15690937.pt


Epoch 654/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:34.00 [info     ] DQN_20230110005015: epoch=654 step=15714966 epoch=654 metrics={'time_sample_batch': 6.0522778064274225e-05, 'time_algorithm_update': 0.001809262312566392, 'loss': 2.475920731719221e-11, 'time_step': 0.0021785462491496124, 'td_error': 4.086385797710788e-13, 'value_scale': 5.043818902174472e-05, 'environment': -0.18489581489047424} step=15714966
2023-01-10 10:34.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15714966.pt


Epoch 655/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:34.54 [info     ] DQN_20230110005015: epoch=655 step=15738995 epoch=655 metrics={'time_sample_batch': 5.962604678442646e-05, 'time_algorithm_update': 0.0017933413312549156, 'loss': 2.5298843880016665e-11, 'time_step': 0.0021638460158111103, 'td_error': 8.654903040529493e-15, 'value_scale': -7.3092215096995715e-06, 'environment': -0.0682976101715725} step=15738995
2023-01-10 10:34.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15738995.pt


Epoch 656/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:35.49 [info     ] DQN_20230110005015: epoch=656 step=15763024 epoch=656 metrics={'time_sample_batch': 6.2212018694015e-05, 'time_algorithm_update': 0.001834204781441917, 'loss': 2.6203717261458357e-11, 'time_step': 0.0022102928602163144, 'td_error': 9.612154179509266e-13, 'value_scale': -7.736261442033969e-05, 'environment': -0.2536370179737566} step=15763024
2023-01-10 10:35.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15763024.pt


Epoch 657/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:36.41 [info     ] DQN_20230110005015: epoch=657 step=15787053 epoch=657 metrics={'time_sample_batch': 5.650578865204626e-05, 'time_algorithm_update': 0.0017471134580770698, 'loss': 2.4602770242879955e-11, 'time_step': 0.002089636399954405, 'td_error': 3.0310292341063997e-14, 'value_scale': -1.371352969391129e-05, 'environment': -0.17128942124768992} step=15787053
2023-01-10 10:36.41 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15787053.pt


Epoch 658/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:37.34 [info     ] DQN_20230110005015: epoch=658 step=15811082 epoch=658 metrics={'time_sample_batch': 5.822591683585683e-05, 'time_algorithm_update': 0.0017629897402458917, 'loss': 2.614848161196723e-11, 'time_step': 0.002110913578820708, 'td_error': 7.01462622421134e-13, 'value_scale': -6.60864877462729e-05, 'environment': -0.2770027535689488} step=15811082
2023-01-10 10:37.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15811082.pt


Epoch 659/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:38.26 [info     ] DQN_20230110005015: epoch=659 step=15835111 epoch=659 metrics={'time_sample_batch': 5.737617670797648e-05, 'time_algorithm_update': 0.001763445721111562, 'loss': 2.5517501339598184e-11, 'time_step': 0.0021083478579544397, 'td_error': 8.561260076204634e-13, 'value_scale': 7.300564019225584e-05, 'environment': -0.30195961166799334} step=15835111
2023-01-10 10:38.26 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15835111.pt


Epoch 660/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:39.24 [info     ] DQN_20230110005015: epoch=660 step=15859140 epoch=660 metrics={'time_sample_batch': 6.764166969014512e-05, 'time_algorithm_update': 0.0018959505697221234, 'loss': 2.5290662299640207e-11, 'time_step': 0.002298816937454492, 'td_error': 1.3560944952327126e-13, 'value_scale': -2.903796835410054e-05, 'environment': -0.18250216493538507} step=15859140
2023-01-10 10:39.24 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15859140.pt


Epoch 661/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:40.18 [info     ] DQN_20230110005015: epoch=661 step=15883169 epoch=661 metrics={'time_sample_batch': 6.069158306182946e-05, 'time_algorithm_update': 0.0018209682986741159, 'loss': 2.4282075362929534e-11, 'time_step': 0.0021993504108732335, 'td_error': 1.1101418328052744e-12, 'value_scale': -8.312706675223288e-05, 'environment': -0.1954893704915715} step=15883169
2023-01-10 10:40.19 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15883169.pt


Epoch 662/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:41.11 [info     ] DQN_20230110005015: epoch=662 step=15907198 epoch=662 metrics={'time_sample_batch': 5.878585173428949e-05, 'time_algorithm_update': 0.001779615946033136, 'loss': 2.6051654277615402e-11, 'time_step': 0.002129125745908971, 'td_error': 2.686157468390656e-13, 'value_scale': -4.087354919499899e-05, 'environment': -0.10307092353314265} step=15907198
2023-01-10 10:41.11 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15907198.pt


Epoch 663/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:42.08 [info     ] DQN_20230110005015: epoch=663 step=15931227 epoch=663 metrics={'time_sample_batch': 6.395595996159284e-05, 'time_algorithm_update': 0.0018583694951573662, 'loss': 2.483977799031054e-11, 'time_step': 0.0022615730958443526, 'td_error': 3.322145461847131e-13, 'value_scale': 4.546710684307291e-05, 'environment': 0.02985677358065892} step=15931227
2023-01-10 10:42.08 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15931227.pt


Epoch 664/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:43.00 [info     ] DQN_20230110005015: epoch=664 step=15955256 epoch=664 metrics={'time_sample_batch': 5.70263426631988e-05, 'time_algorithm_update': 0.0017645776958926755, 'loss': 2.5445785073317225e-11, 'time_step': 0.002109327309934024, 'td_error': 4.5646042248569244e-14, 'value_scale': -1.683393932583239e-05, 'environment': -0.43744849091012705} step=15955256
2023-01-10 10:43.00 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15955256.pt


Epoch 665/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:43.54 [info     ] DQN_20230110005015: epoch=665 step=15979285 epoch=665 metrics={'time_sample_batch': 5.9863006812147274e-05, 'time_algorithm_update': 0.0017921127844970073, 'loss': 2.5647616132901332e-11, 'time_step': 0.0021475530046763613, 'td_error': 4.676085543894298e-13, 'value_scale': -5.395604113003382e-05, 'environment': -0.14281705262462954} step=15979285
2023-01-10 10:43.54 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_15979285.pt


Epoch 666/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:44.49 [info     ] DQN_20230110005015: epoch=666 step=16003314 epoch=666 metrics={'time_sample_batch': 6.246713619810836e-05, 'time_algorithm_update': 0.0018315692386296194, 'loss': 2.490293717100707e-11, 'time_step': 0.002206336435413801, 'td_error': 9.426595662095121e-14, 'value_scale': -2.4212149224816848e-05, 'environment': -0.4739702175003989} step=16003314
2023-01-10 10:44.49 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16003314.pt


Epoch 667/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:45.42 [info     ] DQN_20230110005015: epoch=667 step=16027343 epoch=667 metrics={'time_sample_batch': 5.768941798069845e-05, 'time_algorithm_update': 0.001774594580280314, 'loss': 2.4922233582361052e-11, 'time_step': 0.0021302730106742863, 'td_error': 1.3264171086560494e-13, 'value_scale': -2.8730459170473082e-05, 'environment': -0.1967798649560702} step=16027343
2023-01-10 10:45.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16027343.pt


Epoch 668/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:46.35 [info     ] DQN_20230110005015: epoch=668 step=16051372 epoch=668 metrics={'time_sample_batch': 5.7676429927927055e-05, 'time_algorithm_update': 0.0017674369030942572, 'loss': 2.446074617666094e-11, 'time_step': 0.0021174652924018334, 'td_error': 1.5143877727316458e-13, 'value_scale': -3.069334430205935e-05, 'environment': -0.48436419399096914} step=16051372
2023-01-10 10:46.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16051372.pt


Epoch 669/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:47.29 [info     ] DQN_20230110005015: epoch=669 step=16075401 epoch=669 metrics={'time_sample_batch': 5.9284795371923234e-05, 'time_algorithm_update': 0.0018300250692906126, 'loss': 2.4757490695742924e-11, 'time_step': 0.0021809613125725797, 'td_error': 2.0859428898406174e-14, 'value_scale': -1.137730260160801e-05, 'environment': -0.23538583272694744} step=16075401
2023-01-10 10:47.29 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16075401.pt


Epoch 670/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:48.22 [info     ] DQN_20230110005015: epoch=670 step=16099430 epoch=670 metrics={'time_sample_batch': 5.821806844033186e-05, 'time_algorithm_update': 0.0017810160263711143, 'loss': 2.561952088429095e-11, 'time_step': 0.0021332149484977227, 'td_error': 6.44058334244433e-13, 'value_scale': 6.332474308977793e-05, 'environment': -0.18041686042575036} step=16099430
2023-01-10 10:48.22 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16099430.pt


Epoch 671/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:49.18 [info     ] DQN_20230110005015: epoch=671 step=16123459 epoch=671 metrics={'time_sample_batch': 6.400180014784483e-05, 'time_algorithm_update': 0.0018366217598337039, 'loss': 2.6213893227820192e-11, 'time_step': 0.0022160882702569914, 'td_error': 2.7673283107965434e-12, 'value_scale': 0.0001312682447330902, 'environment': -1.0724672974626768} step=16123459
2023-01-10 10:49.18 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16123459.pt


Epoch 672/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:50.13 [info     ] DQN_20230110005015: epoch=672 step=16147488 epoch=672 metrics={'time_sample_batch': 6.426234505061348e-05, 'time_algorithm_update': 0.0018490520804169185, 'loss': 2.4390093622417566e-11, 'time_step': 0.002238452496553034, 'td_error': 2.62667213647548e-13, 'value_scale': 4.0436673826622636e-05, 'environment': -0.0036818521242474754} step=16147488
2023-01-10 10:50.13 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16147488.pt


Epoch 673/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:51.06 [info     ] DQN_20230110005015: epoch=673 step=16171517 epoch=673 metrics={'time_sample_batch': 5.894645114006326e-05, 'time_algorithm_update': 0.0017630944979702312, 'loss': 2.53453536905177e-11, 'time_step': 0.0021186452009361472, 'td_error': 2.887527816925912e-13, 'value_scale': -4.23975093602924e-05, 'environment': -0.33109260962541953} step=16171517
2023-01-10 10:51.06 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16171517.pt


Epoch 674/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:52.01 [info     ] DQN_20230110005015: epoch=674 step=16195546 epoch=674 metrics={'time_sample_batch': 6.046720428003235e-05, 'time_algorithm_update': 0.0018307597029248329, 'loss': 2.485652745360078e-11, 'time_step': 0.002194639339524017, 'td_error': 2.203424921289819e-14, 'value_scale': 1.168930514340019e-05, 'environment': -0.23268872767305143} step=16195546
2023-01-10 10:52.01 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16195546.pt


Epoch 675/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:52.56 [info     ] DQN_20230110005015: epoch=675 step=16219575 epoch=675 metrics={'time_sample_batch': 6.230669554622636e-05, 'time_algorithm_update': 0.0018491426197458254, 'loss': 2.630536811810464e-11, 'time_step': 0.002222993429533933, 'td_error': 6.96815893919587e-17, 'value_scale': -1.2967633561399828e-07, 'environment': -0.4309272303607812} step=16219575
2023-01-10 10:52.57 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16219575.pt


Epoch 676/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:53.52 [info     ] DQN_20230110005015: epoch=676 step=16243604 epoch=676 metrics={'time_sample_batch': 6.247381378368143e-05, 'time_algorithm_update': 0.0018596647383940592, 'loss': 2.5656422262590706e-11, 'time_step': 0.0022399456562821876, 'td_error': 3.9467997584028204e-13, 'value_scale': 4.9569007670092845e-05, 'environment': -0.2941373649508736} step=16243604
2023-01-10 10:53.52 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16243604.pt


Epoch 677/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:54.48 [info     ] DQN_20230110005015: epoch=677 step=16267633 epoch=677 metrics={'time_sample_batch': 6.099459463064975e-05, 'time_algorithm_update': 0.001854558905648691, 'loss': 2.5028628062074948e-11, 'time_step': 0.0022205125923891768, 'td_error': 2.0819284707151116e-13, 'value_scale': 3.598594079116601e-05, 'environment': -0.3723143694080742} step=16267633
2023-01-10 10:54.48 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16267633.pt


Epoch 678/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:55.42 [info     ] DQN_20230110005015: epoch=678 step=16291662 epoch=678 metrics={'time_sample_batch': 6.008940970606461e-05, 'time_algorithm_update': 0.0017875559921642482, 'loss': 2.4513326180727165e-11, 'time_step': 0.002147785628738413, 'td_error': 8.085408953366963e-13, 'value_scale': 7.095000534295105e-05, 'environment': -0.35397865822916136} step=16291662
2023-01-10 10:55.42 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16291662.pt


Epoch 679/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:56.43 [info     ] DQN_20230110005015: epoch=679 step=16315691 epoch=679 metrics={'time_sample_batch': 7.515172098324815e-05, 'time_algorithm_update': 0.0020095556186842875, 'loss': 2.486625715599374e-11, 'time_step': 0.002469200692852912, 'td_error': 2.1014499922679414e-13, 'value_scale': 3.615998692463232e-05, 'environment': -0.6557194948245324} step=16315691
2023-01-10 10:56.43 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16315691.pt


Epoch 680/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:57.39 [info     ] DQN_20230110005015: epoch=680 step=16339720 epoch=680 metrics={'time_sample_batch': 6.559110520270896e-05, 'time_algorithm_update': 0.001861925681554461, 'loss': 2.594451936830829e-11, 'time_step': 0.002237579221160697, 'td_error': 1.9028305300772328e-12, 'value_scale': -0.0001088517016476516, 'environment': -0.2666534064982393} step=16339720
2023-01-10 10:57.39 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16339720.pt


Epoch 681/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:58.34 [info     ] DQN_20230110005015: epoch=681 step=16363749 epoch=681 metrics={'time_sample_batch': 6.226622322594024e-05, 'time_algorithm_update': 0.0018277033333117766, 'loss': 2.426780228382269e-11, 'time_step': 0.0022102945072879415, 'td_error': 8.136935375094264e-16, 'value_scale': -2.1269323429179145e-06, 'environment': -0.4317123303206148} step=16363749
2023-01-10 10:58.34 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16363749.pt


Epoch 682/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 10:59.33 [info     ] DQN_20230110005015: epoch=682 step=16387778 epoch=682 metrics={'time_sample_batch': 7.05894714854591e-05, 'time_algorithm_update': 0.0019503607871584135, 'loss': 2.5286426517614335e-11, 'time_step': 0.0023748920206829345, 'td_error': 1.5193206373213468e-12, 'value_scale': 9.726312971051816e-05, 'environment': 0.03955595799276087} step=16387778
2023-01-10 10:59.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16387778.pt


Epoch 683/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 11:00.35 [info     ] DQN_20230110005015: epoch=683 step=16411807 epoch=683 metrics={'time_sample_batch': 7.90347320329305e-05, 'time_algorithm_update': 0.002049667498181239, 'loss': 2.5480900902781623e-11, 'time_step': 0.0024780699354347412, 'td_error': 2.502711135680337e-13, 'value_scale': -3.946600347041129e-05, 'environment': -0.35232220404939885} step=16411807
2023-01-10 11:00.35 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16411807.pt


Epoch 684/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 11:01.33 [info     ] DQN_20230110005015: epoch=684 step=16435836 epoch=684 metrics={'time_sample_batch': 6.569575378374788e-05, 'time_algorithm_update': 0.001893888197913986, 'loss': 2.49215098974287e-11, 'time_step': 0.0022937138531358755, 'td_error': 6.543239343145574e-14, 'value_scale': 2.0165878662070245e-05, 'environment': -0.18310714309243337} step=16435836
2023-01-10 11:01.33 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16435836.pt


Epoch 685/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 11:02.30 [info     ] DQN_20230110005015: epoch=685 step=16459865 epoch=685 metrics={'time_sample_batch': 6.822541767234505e-05, 'time_algorithm_update': 0.0019194365113290232, 'loss': 2.521685299835046e-11, 'time_step': 0.002329907170633504, 'td_error': 3.2081333176450322e-15, 'value_scale': -4.324610328895776e-06, 'environment': 0.49014486319020295} step=16459865
2023-01-10 11:02.30 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16459865.pt


Epoch 686/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 11:03.28 [info     ] DQN_20230110005015: epoch=686 step=16483894 epoch=686 metrics={'time_sample_batch': 6.474330981000294e-05, 'time_algorithm_update': 0.001904084861161949, 'loss': 2.4033623060487664e-11, 'time_step': 0.00230990449777678, 'td_error': 2.2097166133866646e-13, 'value_scale': -3.7084588418884316e-05, 'environment': 0.027686299896465705} step=16483894
2023-01-10 11:03.28 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16483894.pt


Epoch 687/1000:   0%|          | 0/24029 [00:00<?, ?it/s]

2023-01-10 11:04.25 [info     ] DQN_20230110005015: epoch=687 step=16507923 epoch=687 metrics={'time_sample_batch': 6.740098886808567e-05, 'time_algorithm_update': 0.001888260580814804, 'loss': 2.5109811625988576e-11, 'time_step': 0.0023010940536676194, 'td_error': 5.9149419517186655e-15, 'value_scale': 6.0232386048019344e-06, 'environment': -0.5771945363741058} step=16507923
2023-01-10 11:04.25 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20230110005015/model_16507923.pt


Epoch 688/1000:   0%|          | 0/24029 [00:00<?, ?it/s]