In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import dqn
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.qvalue_head = nn.Linear(features_dim, num_outputs)
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.qvalue_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        qvalues = self.qvalue_head(features)
        return qvalues, state

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.05
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=500)
    env.update_taskset(TaskSet(preset=['C8']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    #env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.dqn import ApexTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(ApexTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "gamma": 0.95,
             "num_gpus": 1,
             "num_workers": 3,
             "buffer_size": 200000,
             "learning_starts": 5000,
             "train_batch_size": 5000,
             "target_network_update_freq": 10000,
             "prioritized_replay_alpha": 0.5,
             "final_prioritized_replay_beta": 1.0,
             "min_iter_time_s": 10,
             "rollout_fragment_length": 8,
             "collect_metrics_timeout": 1800,
             
             "v_min": -20.0,
             "v_max": 20.0,
             
             "exploration_config": {
                  "initial_epsilon": 1,
                  "epsilon_timesteps": 500000,
                  "final_epsilon": 0.05,
              },
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "APEX C8 pretrained (AnnaCNN) gamma: 0.95"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-13 08:14:27,555	INFO wandb.py:170 -- Already logged into W&B.
2021-10-13 08:14:27,568	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
APEX_my_env_95522_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=152)[0m 2021-10-13 08:14:31,038	INFO dqn.py:188 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=152)[0m 2021-10-13 08:14:31,038	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=146)[0m 2021-10-13 08:15:32,796	INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 1.2314 GB (50000.0 batches of size 1, 24628 bytes each), available system memory is 50.466770944 GB
[2m[36m(pid=151)[0m 2021-10-13 08:15:33,163	INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 1.23145 GB (50000.0 batches of size 1, 24629 bytes each), available system memory is 50.466770944 GB
[2m[36m(pid=145)[0m 2021-10-13 08:15:33,381	INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 1.23145 GB (50000.0 batc

Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 25040
  custom_metrics: {}
  date: 2021-10-13_08-21-10
  done: false
  episode_len_mean: 218.6216216216216
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.6756756756756757
  episode_reward_min: -1.0
  episodes_this_iter: 37
  episodes_total: 37
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 24215
    - cur_epsilon: 0.016190861620062107
      last_timestep: 24863
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 24551
    last_target_update_ts: 6525000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.2605246603488922
        max_q: 10.484436988830566
        mean_q: 1.2949919700622559
        min_q: -0.9507524967193604
    learner_queue:
      size_count: 1307
      size_mean: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,1,392.652,25040,0.675676,3,-1,218.622




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 50056
  custom_metrics: {}
  date: 2021-10-13_08-27-59
  done: false
  episode_len_mean: 204.45569620253164
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.8734177215189873
  episode_reward_min: -2.0
  episodes_this_iter: 42
  episodes_total: 79
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 49103
    - cur_epsilon: 0.016190861620062107
      last_timestep: 49847
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 49303
    last_target_update_ts: 15270000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.2432873249053955
        max_q: 95.32525634765625
        mean_q: 2.972226619720459
        min_q: -1.3175201416015625
    learner_queue:
      size_count: 3056
      size_mean: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,2,801.548,50056,0.873418,7,-2,204.456




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 75056
  custom_metrics: {}
  date: 2021-10-13_08-35-57
  done: false
  episode_len_mean: 184.78
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.92
  episode_reward_min: -2.0
  episodes_this_iter: 50
  episodes_total: 129
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 74503
    - cur_epsilon: 0.016190861620062107
      last_timestep: 75055
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 74727
    last_target_update_ts: 25230000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.1525847166776657
        max_q: 75.24273681640625
        mean_q: 3.410060405731201
        min_q: -0.6062561273574829
    learner_queue:
      size_count: 5047
      size_mean: 0.54
      size_quantiles:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,3,1279.98,75056,0.92,7,-2,184.78


[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:00:41.961330, resuming normal operation.


Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 100056
  custom_metrics: {}
  date: 2021-10-13_08-42-51
  done: false
  episode_len_mean: 194.34
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.47
  episode_reward_min: -2.0
  episodes_this_iter: 43
  episodes_total: 172
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 99487
    - cur_epsilon: 0.016190861620062107
      last_timestep: 99655
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 99399
    last_target_update_ts: 33525000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.18572236597537994
        max_q: 34.8173828125
        mean_q: 4.053640365600586
        min_q: 0.14973044395446777
    learner_queue:
      size_count: 6708
      size_mean: 0.7
      size_quantiles:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,4,1693.24,100056,0.47,3,-2,194.34




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 125056
  custom_metrics: {}
  date: 2021-10-13_08-50-36
  done: false
  episode_len_mean: 202.75
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 0.64
  episode_reward_min: -2.0
  episodes_this_iter: 47
  episodes_total: 219
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 124919
    - cur_epsilon: 0.016190861620062107
      last_timestep: 124895
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 124791
    last_target_update_ts: 42750000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.11041738092899323
        max_q: 158.53546142578125
        mean_q: 4.642679691314697
        min_q: -0.5573549270629883
    learner_queue:
      size_count: 8551
      size_mean: 0.54
      size_quan

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,5,2158.63,125056,0.64,8,-2,202.75




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 150104
  custom_metrics: {}
  date: 2021-10-13_08-58-28
  done: false
  episode_len_mean: 179.42
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 0.8
  episode_reward_min: -3.0
  episodes_this_iter: 48
  episodes_total: 267
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 150015
    - cur_epsilon: 0.016190861620062107
      last_timestep: 149127
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 150047
    last_target_update_ts: 51945000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.1411726027727127
        max_q: 136.02589416503906
        mean_q: 5.371298313140869
        min_q: -1.2066712379455566
    learner_queue:
      size_count: 10391
      size_mean: 0.62
      size_quant

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,6,2630.68,150104,0.8,8,-3,179.42




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 175120
  custom_metrics: {}
  date: 2021-10-13_09-06-18
  done: false
  episode_len_mean: 165.22
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.75
  episode_reward_min: -3.0
  episodes_this_iter: 48
  episodes_total: 315
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 174943
    - cur_epsilon: 0.016190861620062107
      last_timestep: 174863
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 174943
    last_target_update_ts: 60885000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.1705997735261917
        max_q: 134.10540771484375
        mean_q: 5.339540958404541
        min_q: -1.3896775245666504
    learner_queue:
      size_count: 12179
      size_mean: 0.52
      size_quan

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,7,3100.75,175120,1.75,9,-3,165.22




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 200120
  custom_metrics: {}
  date: 2021-10-13_09-13-46
  done: false
  episode_len_mean: 161.68
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 2.43
  episode_reward_min: -3.0
  episodes_this_iter: 45
  episodes_total: 360
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 199847
    - cur_epsilon: 0.016190861620062107
      last_timestep: 199447
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 198671
    last_target_update_ts: 69360000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.3477717638015747
        max_q: 416.7607421875
        mean_q: 6.609777927398682
        min_q: -2.088261365890503
    learner_queue:
      size_count: 13874
      size_mean: 0.6
      size_quantiles:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,8,3547.91,200120,2.43,9,-3,161.68




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 225120
  custom_metrics: {}
  date: 2021-10-13_09-20-39
  done: false
  episode_len_mean: 171.32
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.58
  episode_reward_min: -3.0
  episodes_this_iter: 46
  episodes_total: 406
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 224743
    - cur_epsilon: 0.016190861620062107
      last_timestep: 224783
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 224351
    last_target_update_ts: 77130000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 6.995969295501709
        max_q: 7577.2861328125
        mean_q: 272.8641662597656
        min_q: -0.6759246587753296
    learner_queue:
      size_count: 15427
      size_mean: 0.68
      size_quantile

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,9,3961.56,225120,1.58,9,-3,171.32




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 250152
  custom_metrics: {}
  date: 2021-10-13_09-25-59
  done: false
  episode_len_mean: 206.1
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.7
  episode_reward_min: -3.0
  episodes_this_iter: 34
  episodes_total: 440
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 249951
    - cur_epsilon: 0.016190861620062107
      last_timestep: 249775
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 249671
    last_target_update_ts: 83265000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 119.08619689941406
        max_q: 218544.6875
        mean_q: 9522.90234375
        min_q: -150.47955322265625
    learner_queue:
      size_count: 16654
      size_mean: 0.6
      size_quantiles:
      -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,10,4281.43,250152,0.7,6,-3,206.1


Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 275152
  custom_metrics: {}
  date: 2021-10-13_09-29-08
  done: false
  episode_len_mean: 272.14
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.36
  episode_reward_min: -1.0
  episodes_this_iter: 19
  episodes_total: 459
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 274503
    - cur_epsilon: 0.016190861620062107
      last_timestep: 274527
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 274871
    last_target_update_ts: 86835000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 201.9309844970703
        max_q: 289185.03125
        mean_q: 9569.134765625
        min_q: -154.4058837890625
    learner_queue:
      size_count: 17370
      size_mean: 0.68
      size_quantiles:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,11,4470.76,275152,0.36,6,-1,272.14




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 300168
  custom_metrics: {}
  date: 2021-10-13_09-32-36
  done: false
  episode_len_mean: 316.65
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.02
  episode_reward_min: 0.0
  episodes_this_iter: 21
  episodes_total: 480
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 299487
    - cur_epsilon: 0.016190861620062107
      last_timestep: 300047
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 298607
    last_target_update_ts: 90870000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 2721.242431640625
        max_q: 5008807.5
        mean_q: 375669.53125
        min_q: -20791.80859375
    learner_queue:
      size_count: 18179
      size_mean: 0.78
      size_quantiles:
      - 0.0
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,12,4677.88,300168,0.02,2,0,316.65




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 325168
  custom_metrics: {}
  date: 2021-10-13_09-36-03
  done: false
  episode_len_mean: 388.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 19
  episodes_total: 499
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 324215
    - cur_epsilon: 0.016190861620062107
      last_timestep: 324687
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 324855
    last_target_update_ts: 95055000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 20195.0703125
        max_q: 11120569.0
        mean_q: 1516449.125
        min_q: -52764.79296875
    learner_queue:
      size_count: 19012
      size_mean: 0.56
      size_quantiles:
      - 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,13,4885.36,325168,0,0,0,388.63


Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 350200
  custom_metrics: {}
  date: 2021-10-13_09-39-24
  done: false
  episode_len_mean: 459.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 19
  episodes_total: 518
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 349023
    - cur_epsilon: 0.016190861620062107
      last_timestep: 349999
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 349999
    last_target_update_ts: 98970000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 26618.138671875
        max_q: 91238752.0
        mean_q: 5187430.5
        min_q: -436379.09375
    learner_queue:
      size_count: 19798
      size_mean: 0.76
      size_quantiles:
      - 0.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,14,5086.53,350200,0,0,0,459.76




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 375208
  custom_metrics: {}
  date: 2021-10-13_09-42-57
  done: false
  episode_len_mean: 492.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 21
  episodes_total: 539
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 374735
    - cur_epsilon: 0.016190861620062107
      last_timestep: 375167
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 374791
    last_target_update_ts: 103185000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 76385.640625
        max_q: 69443712.0
        mean_q: 4168082.0
        min_q: -2478.599853515625
    learner_queue:
      size_count: 20638
      size_mean: 0.6
      size_quantiles:
      - 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,15,5298.79,375208,0,0,0,492.95




Result for APEX_my_env_95522_00000:
  agent_timesteps_total: 400208
  custom_metrics: {}
  date: 2021-10-13_09-46-58
  done: false
  episode_len_mean: 490.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 18
  episodes_total: 557
  experiment_id: 761a764a1328456a9969c858fe012428
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 399607
    - cur_epsilon: 0.016190861620062107
      last_timestep: 399831
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 399863
    last_target_update_ts: 107295000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 5459.8154296875
        max_q: 32462.51171875
        mean_q: 406.15838623046875
        min_q: -1212.914794921875
    learner_queue:
      size_count: 21462
      size_mean: 0.44
      size_quantiles:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_95522_00000,RUNNING,192.168.3.5:152,16,5540.53,400208,0,0,0,490.38
