In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import dqn
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.qvalue_head = nn.Linear(features_dim, num_outputs)
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.qvalue_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        qvalues = self.qvalue_head(features)
        return qvalues, state

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=500)
    env.update_taskset(TaskSet(preset=['C8']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.dqn import ApexTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(ApexTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             #"gamma": 0.99,
             "num_gpus": 1,
             "num_workers": 2,
             "buffer_size": 250_000,
             "learning_starts": 10_000,
             "train_batch_size": 1000,
             "target_network_update_freq": 5000,
             #"prioritized_replay_alpha": 0.5,
             #"final_prioritized_replay_beta": 1.0,
             "min_iter_time_s": 30, 
             "rollout_fragment_length": 4,
             "collect_metrics_timeout": 1800,
             
             "v_min": -10.0,
             "v_max": 100.0,
             
             "exploration_config": {
                  "initial_epsilon": 1,
                  "epsilon_timesteps": 100_000,
                  "final_epsilon": 0.05,
              },
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "APEX C8 pretrained (AnnaCNN) r: -0.01"
                  }
              },
              #"training_intensity": 50,
              "lr": 1e-5,
             
              "evaluation_num_workers": 1,
              "evaluation_interval": 5,
              "evaluation_num_episodes": 1,
              "evaluation_config": {
                  #"input": "sampler",
                  "explore": False,  
              },
        },
        loggers=[WandbLogger])

2021-10-20 08:49:14,970	INFO wandb.py:170 -- Already logged into W&B.
2021-10-20 08:49:14,983	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
APEX_my_env_9a6b4_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=162)[0m 2021-10-20 08:49:18,485	INFO dqn.py:188 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=162)[0m 2021-10-20 08:49:18,485	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=162)[0m 2021-10-20 08:49:28,695	INFO trainable.py:109 -- Trainable.setup took 12.727 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=164)[0m 2021-10-20 08:50:25,038	INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 1.53925 GB (62500.0 batches of size 1, 24628 bytes each), available system memory is 50.46360064 GB
[2m[36m(pid=168)[0m 2021-10-20 08:50:25,270	INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 1.5390625 GB (62500.0 batch

Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-20_08-57-36
  done: false
  episode_len_mean: 194.7741935483871
  episode_media: {}
  episode_reward_max: -0.09000000000000141
  episode_reward_mean: -1.9091935483870903
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 62
  episodes_total: 62
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 24655
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 24967
    last_target_update_ts: 6096000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.7736769914627075
        max_q: 23.064054489135742
        mean_q: 7.344659328460693
        min_q: 3.002164840698242
    learner_queue:
      size_count: 6108
      size_mean: 14.9
      size_quantiles:
      - 6.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,1,487.634,25000,-1.90919,-0.09,-5,194.774




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-20_09-07-03
  done: false
  episode_len_mean: 181.44
  episode_media: {}
  episode_reward_max: 0.47000000000000064
  episode_reward_mean: -1.7834999999999923
  episode_reward_min: -6.8199999999999195
  episodes_this_iter: 64
  episodes_total: 126
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 49475
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 49911
    last_target_update_ts: 17172000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.6480166912078857
        max_q: 12.831802368164062
        mean_q: 5.357852935791016
        min_q: 1.970428705215454
    learner_queue:
      size_count: 17194
      size_mean: 13.28
      size_quantiles:
      - 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,2,1054.68,50000,-1.7835,0.47,-6.82,181.44




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-20_09-15-37
  done: false
  episode_len_mean: 209.48
  episode_media: {}
  episode_reward_max: 1.4900000000000173
  episode_reward_mean: -1.682799999999989
  episode_reward_min: -5.949999999999917
  episodes_this_iter: 57
  episodes_total: 183
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 74795
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 74551
    last_target_update_ts: 27306000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.5590872168540955
        max_q: 13.527922630310059
        mean_q: 5.163458824157715
        min_q: 1.9996591806411743
    learner_queue:
      size_count: 27320
      size_mean: 14.44
      size_quantiles:
      - 4.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,3,1568.39,75000,-1.6828,1.49,-5.95,209.48




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 100004
  custom_metrics: {}
  date: 2021-10-20_09-26-02
  done: false
  episode_len_mean: 173.44
  episode_media: {}
  episode_reward_max: 2.7600000000000247
  episode_reward_mean: -0.47079999999999367
  episode_reward_min: -5.949999999999917
  episodes_this_iter: 73
  episodes_total: 256
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 99903
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 99635
    last_target_update_ts: 40002000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.2877110540866852
        max_q: 19.161291122436523
        mean_q: 7.0736284255981445
        min_q: 3.3476362228393555
    learner_queue:
      size_count: 40019
      size_mean: 13.9
      size_quantiles:
      - 2.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,4,2194.02,100004,-0.4708,2.76,-5.95,173.44




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 125004
  custom_metrics: {}
  date: 2021-10-20_09-36-06
  done: false
  episode_len_mean: 181.58
  episode_media: {}
  episode_reward_max: 4.95000000000002
  episode_reward_mean: -0.2559999999999943
  episode_reward_min: -5.50999999999997
  episodes_this_iter: 66
  episodes_total: 322
  evaluation:
    custom_metrics: {}
    episode_len_mean: 500.0
    episode_media: {}
    episode_reward_max: -4.999999999999938
    episode_reward_mean: -4.999999999999938
    episode_reward_min: -4.999999999999938
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 500
      episode_reward:
      - -4.999999999999938
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.040027195821979084
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 79.8522940652813
      mean_inference_ms: 1.4646724312605257
      mean_raw_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,5,2797.73,125004,-0.256,4.95,-5.51,181.58




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 150004
  custom_metrics: {}
  date: 2021-10-20_09-45-32
  done: false
  episode_len_mean: 196.66
  episode_media: {}
  episode_reward_max: 4.95000000000002
  episode_reward_mean: -0.014699999999993931
  episode_reward_min: -4.759999999999965
  episodes_this_iter: 62
  episodes_total: 384
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 149719
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 149839
    last_target_update_ts: 61974000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.4692617654800415
        max_q: 19.81983757019043
        mean_q: 6.801989555358887
        min_q: 2.3713719844818115
    learner_queue:
      size_count: 61981
      size_mean: 15.88
      size_quantiles:
      - 13.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,6,3364.09,150004,-0.0147,4.95,-4.76,196.66




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 175004
  custom_metrics: {}
  date: 2021-10-20_09-55-40
  done: false
  episode_len_mean: 171.95
  episode_media: {}
  episode_reward_max: 5.490000000000009
  episode_reward_mean: 2.081000000000017
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 67
  episodes_total: 451
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 174283
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 174839
    last_target_update_ts: 73050000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.40282002091407776
        max_q: 20.576526641845703
        mean_q: 7.125546455383301
        min_q: 3.744353771209717
    learner_queue:
      size_count: 73065
      size_mean: 14.9
      size_quantiles:
      - 6.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,7,3971.45,175004,2.081,5.49,-5,171.95




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 200004
  custom_metrics: {}
  date: 2021-10-20_10-05-58
  done: false
  episode_len_mean: 167.59
  episode_media: {}
  episode_reward_max: 5.0000000000000195
  episode_reward_mean: 2.61370000000002
  episode_reward_min: -3.9499999999999593
  episodes_this_iter: 69
  episodes_total: 520
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 199275
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 200003
    last_target_update_ts: 84246000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.2842072248458862
        max_q: 23.794164657592773
        mean_q: 8.675408363342285
        min_q: 3.5812556743621826
    learner_queue:
      size_count: 84254
      size_mean: 15.7
      size_quantiles:
      - 11.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,8,4589.24,200004,2.6137,5,-3.95,167.59




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 225004
  custom_metrics: {}
  date: 2021-10-20_10-15-31
  done: false
  episode_len_mean: 193.12
  episode_media: {}
  episode_reward_max: 8.230000000000015
  episode_reward_mean: 2.0901000000000196
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 64
  episodes_total: 584
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 224455
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 224791
    last_target_update_ts: 94650000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.8807483315467834
        max_q: 18.68961524963379
        mean_q: 8.523677825927734
        min_q: 4.418423175811768
    learner_queue:
      size_count: 94660
      size_mean: 15.8
      size_quantiles:
      - 12.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,9,5162.57,225004,2.0901,8.23,-5,193.12




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 250004
  custom_metrics: {}
  date: 2021-10-20_10-25-13
  done: false
  episode_len_mean: 197.17
  episode_media: {}
  episode_reward_max: 6.570000000000029
  episode_reward_mean: 1.4387000000000199
  episode_reward_min: -5.649999999999923
  episodes_this_iter: 63
  episodes_total: 647
  evaluation:
    custom_metrics: {}
    episode_len_mean: 132.0
    episode_media: {}
    episode_reward_max: -0.30000000000000066
    episode_reward_mean: -0.30000000000000066
    episode_reward_min: -0.30000000000000066
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 132
      episode_reward:
      - -0.30000000000000066
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.039863360436606744
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 72.31051021654271
      mean_inference_ms: 1.4627160619220463
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,10,5744.7,250004,1.4387,6.57,-5.65,197.17




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 275004
  custom_metrics: {}
  date: 2021-10-20_10-34-15
  done: false
  episode_len_mean: 213.62
  episode_media: {}
  episode_reward_max: 6.8600000000000225
  episode_reward_mean: 1.7794000000000207
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 59
  episodes_total: 706
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 274991
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 274775
    last_target_update_ts: 114966000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.9960803389549255
        max_q: 18.645931243896484
        mean_q: 8.224692344665527
        min_q: 0.47992491722106934
    learner_queue:
      size_count: 114984
      size_mean: 14.44
      size_quantiles:
      - 4.0
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,11,6286.02,275004,1.7794,6.86,-4.67,213.62




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 300004
  custom_metrics: {}
  date: 2021-10-20_10-43-57
  done: false
  episode_len_mean: 209.01
  episode_media: {}
  episode_reward_max: 7.310000000000013
  episode_reward_mean: 1.6739000000000224
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 65
  episodes_total: 771
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 299951
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 299711
    last_target_update_ts: 125544000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.6520278453826904
        max_q: 19.133159637451172
        mean_q: 7.748793125152588
        min_q: 4.047493934631348
    learner_queue:
      size_count: 125556
      size_mean: 14.68
      size_quantiles:
      - 5.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,12,6868.03,300004,1.6739,7.31,-5,209.01




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 325008
  custom_metrics: {}
  date: 2021-10-20_10-53-33
  done: false
  episode_len_mean: 202.54
  episode_media: {}
  episode_reward_max: 7.490000000000009
  episode_reward_mean: 2.310500000000023
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 63
  episodes_total: 834
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 324547
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 324819
    last_target_update_ts: 136008000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.2368075847625732
        max_q: 17.204023361206055
        mean_q: 7.835657119750977
        min_q: 3.846951961517334
    learner_queue:
      size_count: 136015
      size_mean: 15.8
      size_quantiles:
      - 12.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,13,7444.17,325008,2.3105,7.49,-5,202.54




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 350012
  custom_metrics: {}
  date: 2021-10-20_11-03-35
  done: false
  episode_len_mean: 187.38
  episode_media: {}
  episode_reward_max: 7.490000000000009
  episode_reward_mean: 2.1191000000000217
  episode_reward_min: -4.88999999999994
  episodes_this_iter: 66
  episodes_total: 900
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 349723
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 350003
    last_target_update_ts: 146994000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.45303821563720703
        max_q: 20.518218994140625
        mean_q: 7.792074680328369
        min_q: 4.035220146179199
    learner_queue:
      size_count: 147009
      size_mean: 14.68
      size_quantiles:
      - 5.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,14,8046.46,350012,2.1191,7.49,-4.89,187.38




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 375012
  custom_metrics: {}
  date: 2021-10-20_11-13-04
  done: false
  episode_len_mean: 198.42
  episode_media: {}
  episode_reward_max: 7.270000000000014
  episode_reward_mean: 1.881500000000021
  episode_reward_min: -4.799999999999942
  episodes_this_iter: 61
  episodes_total: 961
  evaluation:
    custom_metrics: {}
    episode_len_mean: 160.0
    episode_media: {}
    episode_reward_max: 3.5300000000000296
    episode_reward_mean: 3.5300000000000296
    episode_reward_min: 3.5300000000000296
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 160
      episode_reward:
      - 3.5300000000000296
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.03981439944023142
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 63.36444683964887
      mean_inference_ms: 1.4559332690052547
      mean_raw_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,15,8615.06,375012,1.8815,7.27,-4.8,198.42




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 400012
  custom_metrics: {}
  date: 2021-10-20_11-22-21
  done: false
  episode_len_mean: 205.39
  episode_media: {}
  episode_reward_max: 7.630000000000026
  episode_reward_mean: 2.7598000000000265
  episode_reward_min: -4.799999999999942
  episodes_this_iter: 62
  episodes_total: 1023
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 399839
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 399739
    last_target_update_ts: 167880000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.7734353542327881
        max_q: 17.750133514404297
        mean_q: 7.266451835632324
        min_q: 3.8521690368652344
    learner_queue:
      size_count: 167883
      size_mean: 2.66
      size_quantiles:
      - 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,16,9172.62,400012,2.7598,7.63,-4.8,205.39




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 425016
  custom_metrics: {}
  date: 2021-10-20_11-31-58
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: 8.170000000000016
  episode_reward_mean: 2.8746000000000267
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 64
  episodes_total: 1087
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 424879
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 424987
    last_target_update_ts: 178680000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.0546131134033203
        max_q: 18.876495361328125
        mean_q: 7.149586200714111
        min_q: 4.0302934646606445
    learner_queue:
      size_count: 178692
      size_mean: 9.2
      size_quantiles:
      - 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,17,9749.76,425016,2.8746,8.17,-4.92,200.01




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 450016
  custom_metrics: {}
  date: 2021-10-20_11-42-23
  done: false
  episode_len_mean: 184.39
  episode_media: {}
  episode_reward_max: 7.3000000000000345
  episode_reward_mean: 2.060000000000019
  episode_reward_min: -2.879999999999984
  episodes_this_iter: 69
  episodes_total: 1156
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 449819
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 449991
    last_target_update_ts: 190356000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.5527015924453735
        max_q: 19.836103439331055
        mean_q: 7.651308536529541
        min_q: 3.1310322284698486
    learner_queue:
      size_count: 190363
      size_mean: 3.22
      size_quantiles:
      - 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,18,10373.8,450016,2.06,7.3,-2.88,184.39


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:01:27.899586, resuming normal operation.


Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 475020
  custom_metrics: {}
  date: 2021-10-20_11-53-33
  done: false
  episode_len_mean: 149.43
  episode_media: {}
  episode_reward_max: 8.050000000000018
  episode_reward_mean: 3.522600000000018
  episode_reward_min: -2.879999999999984
  episodes_this_iter: 75
  episodes_total: 1231
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 474919
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 474899
    last_target_update_ts: 202758000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.822008490562439
        max_q: 20.5135555267334
        mean_q: 7.920034408569336
        min_q: 4.375148773193359
    learner_queue:
      size_count: 202772
      size_mean: 8.62
      size_quantiles:
      - 0.0
      - 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,19,11043.9,475020,3.5226,8.05,-2.88,149.43




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 500020
  custom_metrics: {}
  date: 2021-10-20_12-04-21
  done: false
  episode_len_mean: 168.58
  episode_media: {}
  episode_reward_max: 7.930000000000021
  episode_reward_mean: 3.2698000000000205
  episode_reward_min: -2.389999999999993
  episodes_this_iter: 70
  episodes_total: 1301
  evaluation:
    custom_metrics: {}
    episode_len_mean: 108.0
    episode_media: {}
    episode_reward_max: -1.0800000000000007
    episode_reward_mean: -1.0800000000000007
    episode_reward_min: -1.0800000000000007
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 108
      episode_reward:
      - -1.0800000000000007
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.03960712106855013
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 60.42379650238748
      mean_inference_ms: 1.4531371596121496
      mea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,20,11692.2,500020,3.2698,7.93,-2.39,168.58




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 525028
  custom_metrics: {}
  date: 2021-10-20_12-14-18
  done: false
  episode_len_mean: 175.1
  episode_media: {}
  episode_reward_max: 5.870000000000021
  episode_reward_mean: 2.389400000000018
  episode_reward_min: -2.279999999999954
  episodes_this_iter: 66
  episodes_total: 1367
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 524707
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 524943
    last_target_update_ts: 225666000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.5061358213424683
        max_q: 28.610883712768555
        mean_q: 7.697455883026123
        min_q: 3.032377243041992
    learner_queue:
      size_count: 225680
      size_mean: 5.2
      size_quantiles:
      - 0.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,21,12289.4,525028,2.3894,5.87,-2.28,175.1




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 550028
  custom_metrics: {}
  date: 2021-10-20_12-24-12
  done: false
  episode_len_mean: 192.76
  episode_media: {}
  episode_reward_max: 6.320000000000034
  episode_reward_mean: 2.0620000000000207
  episode_reward_min: -3.5299999999999683
  episodes_this_iter: 67
  episodes_total: 1434
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 549923
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 549675
    last_target_update_ts: 236748000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.47206225991249084
        max_q: 26.89636993408203
        mean_q: 8.323863983154297
        min_q: 2.9766685962677
    learner_queue:
      size_count: 236757
      size_mean: 15.28
      size_quantiles:
      - 8.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,22,12883.2,550028,2.062,6.32,-3.53,192.76




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 575028
  custom_metrics: {}
  date: 2021-10-20_12-34-29
  done: false
  episode_len_mean: 180.45
  episode_media: {}
  episode_reward_max: 7.820000000000023
  episode_reward_mean: 2.7562000000000206
  episode_reward_min: -5.439999999999923
  episodes_this_iter: 67
  episodes_total: 1501
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 574979
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 574823
    last_target_update_ts: 248010000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.7458610534667969
        max_q: 32.39504623413086
        mean_q: 8.327836036682129
        min_q: 3.284803867340088
    learner_queue:
      size_count: 248019
      size_mean: 15.28
      size_quantiles:
      - 8.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,23,13500.3,575028,2.7562,7.82,-5.44,180.45




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 600028
  custom_metrics: {}
  date: 2021-10-20_12-45-12
  done: false
  episode_len_mean: 157.51
  episode_media: {}
  episode_reward_max: 7.8600000000000225
  episode_reward_mean: 2.5267000000000173
  episode_reward_min: -3.5699999999999745
  episodes_this_iter: 71
  episodes_total: 1572
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 599767
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 599887
    last_target_update_ts: 260010000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.4628368616104126
        max_q: 25.50403594970703
        mean_q: 8.805436134338379
        min_q: 3.9942331314086914
    learner_queue:
      size_count: 260027
      size_mean: 12.58
      size_quantiles:
      - 0.0
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,24,14143.4,600028,2.5267,7.86,-3.57,157.51




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 625028
  custom_metrics: {}
  date: 2021-10-20_12-55-27
  done: false
  episode_len_mean: 175.85
  episode_media: {}
  episode_reward_max: 5.850000000000023
  episode_reward_mean: 2.568800000000019
  episode_reward_min: -4.349999999999984
  episodes_this_iter: 67
  episodes_total: 1639
  evaluation:
    custom_metrics: {}
    episode_len_mean: 116.0
    episode_media: {}
    episode_reward_max: 5.270000000000014
    episode_reward_mean: 5.270000000000014
    episode_reward_min: 5.270000000000014
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 116
      episode_reward:
      - 5.270000000000014
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.03957912516336056
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 58.09801916574423
      mean_inference_ms: 1.4514536280899273
      mean_raw_obs

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,25,14757.8,625028,2.5688,5.85,-4.35,175.85




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 650028
  custom_metrics: {}
  date: 2021-10-20_13-05-32
  done: false
  episode_len_mean: 195.74
  episode_media: {}
  episode_reward_max: 5.940000000000021
  episode_reward_mean: 2.564000000000023
  episode_reward_min: -3.439999999999993
  episodes_this_iter: 66
  episodes_total: 1705
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 649895
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 649735
    last_target_update_ts: 282336000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.324009895324707
        max_q: 33.8082389831543
        mean_q: 9.101675987243652
        min_q: 3.467761278152466
    learner_queue:
      size_count: 282345
      size_mean: 15.28
      size_quantiles:
      - 8.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,26,15362.7,650028,2.564,5.94,-3.44,195.74




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 675028
  custom_metrics: {}
  date: 2021-10-20_13-16-12
  done: false
  episode_len_mean: 179.27
  episode_media: {}
  episode_reward_max: 6.320000000000013
  episode_reward_mean: 2.7885000000000217
  episode_reward_min: -2.3999999999999835
  episodes_this_iter: 71
  episodes_total: 1776
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 674415
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 674851
    last_target_update_ts: 293880000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.7001587748527527
        max_q: 38.86638641357422
        mean_q: 9.934392929077148
        min_q: 4.782171249389648
    learner_queue:
      size_count: 293892
      size_mean: 14.9
      size_quantiles:
      - 6.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,27,16003.2,675028,2.7885,6.32,-2.4,179.27




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 700028
  custom_metrics: {}
  date: 2021-10-20_13-26-58
  done: false
  episode_len_mean: 171.03
  episode_media: {}
  episode_reward_max: 5.97000000000002
  episode_reward_mean: 3.1793000000000236
  episode_reward_min: -3.2299999999999907
  episodes_this_iter: 73
  episodes_total: 1849
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 698943
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 699971
    last_target_update_ts: 305652000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.2548389434814453
        max_q: 56.63157653808594
        mean_q: 9.673287391662598
        min_q: 4.732296943664551
    learner_queue:
      size_count: 305665
      size_mean: 15.28
      size_quantiles:
      - 8.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,28,16649.2,700028,3.1793,5.97,-3.23,171.03




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 725028
  custom_metrics: {}
  date: 2021-10-20_13-37-43
  done: false
  episode_len_mean: 178.14
  episode_media: {}
  episode_reward_max: 5.930000000000021
  episode_reward_mean: 2.5554000000000228
  episode_reward_min: -1.6100000000000012
  episodes_this_iter: 68
  episodes_total: 1917
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 725015
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 724779
    last_target_update_ts: 317256000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.9419304132461548
        max_q: 61.5631103515625
        mean_q: 10.556578636169434
        min_q: 4.406432628631592
    learner_queue:
      size_count: 317271
      size_mean: 14.44
      size_quantiles:
      - 4.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,29,17293.5,725028,2.5554,5.93,-1.61,178.14




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 750028
  custom_metrics: {}
  date: 2021-10-20_13-48-21
  done: false
  episode_len_mean: 170.51
  episode_media: {}
  episode_reward_max: 5.950000000000021
  episode_reward_mean: 2.806300000000023
  episode_reward_min: -2.070000000000001
  episodes_this_iter: 67
  episodes_total: 1984
  evaluation:
    custom_metrics: {}
    episode_len_mean: 456.0
    episode_media: {}
    episode_reward_max: 0.5600000000000609
    episode_reward_mean: 0.5600000000000609
    episode_reward_min: 0.5600000000000609
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 456
      episode_reward:
      - 0.5600000000000609
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.039188212791593076
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 42.99895616105359
      mean_inference_ms: 1.45492145825792
      mean_raw_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,30,17932,750028,2.8063,5.95,-2.07,170.51




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 775028
  custom_metrics: {}
  date: 2021-10-20_13-59-00
  done: false
  episode_len_mean: 180.87
  episode_media: {}
  episode_reward_max: 6.210000000000015
  episode_reward_mean: 2.996400000000022
  episode_reward_min: -2.070000000000001
  episodes_this_iter: 71
  episodes_total: 2055
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 774479
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 774847
    last_target_update_ts: 340824000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.5437505841255188
        max_q: 43.6215705871582
        mean_q: 10.890299797058105
        min_q: 3.178647041320801
    learner_queue:
      size_count: 340827
      size_mean: 3.6
      size_quantiles:
      - 0.0
      - 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,31,18571.2,775028,2.9964,6.21,-2.07,180.87




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 800028
  custom_metrics: {}
  date: 2021-10-20_14-10-18
  done: false
  episode_len_mean: 154.4
  episode_media: {}
  episode_reward_max: 8.100000000000017
  episode_reward_mean: 2.8432000000000186
  episode_reward_min: -3.2899999999999814
  episodes_this_iter: 75
  episodes_total: 2130
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 799775
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 799871
    last_target_update_ts: 353466000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.7959231734275818
        max_q: 46.343502044677734
        mean_q: 11.150803565979004
        min_q: 4.781230926513672
    learner_queue:
      size_count: 353471
      size_mean: 4.86
      size_quantiles:
      - 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,32,19249,800028,2.8432,8.1,-3.29,154.4




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 825028
  custom_metrics: {}
  date: 2021-10-20_14-20-51
  done: false
  episode_len_mean: 171.97
  episode_media: {}
  episode_reward_max: 8.100000000000017
  episode_reward_mean: 2.567000000000019
  episode_reward_min: -3.2899999999999814
  episodes_this_iter: 69
  episodes_total: 2199
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 824751
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 824703
    last_target_update_ts: 365256000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 0.9530483484268188
        max_q: 48.00981521606445
        mean_q: 11.326390266418457
        min_q: 3.620976686477661
    learner_queue:
      size_count: 365276
      size_mean: 13.14
      size_quantiles:
      - 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,33,19882.1,825028,2.567,8.1,-3.29,171.97




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 850028
  custom_metrics: {}
  date: 2021-10-20_14-32-09
  done: false
  episode_len_mean: 161.62
  episode_media: {}
  episode_reward_max: 7.360000000000012
  episode_reward_mean: 2.383500000000016
  episode_reward_min: -3.009999999999959
  episodes_this_iter: 75
  episodes_total: 2274
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 849919
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 849835
    last_target_update_ts: 377760000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 2.3055105209350586
        max_q: 58.27530288696289
        mean_q: 11.255085945129395
        min_q: 2.85054349899292
    learner_queue:
      size_count: 377774
      size_mean: 14.18
      size_quantiles:
      - 3.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,34,20560.1,850028,2.3835,7.36,-3.01,161.62




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 875028
  custom_metrics: {}
  date: 2021-10-20_14-43-42
  done: false
  episode_len_mean: 156.39
  episode_media: {}
  episode_reward_max: 7.680000000000005
  episode_reward_mean: 2.719500000000016
  episode_reward_min: -2.809999999999994
  episodes_this_iter: 72
  episodes_total: 2346
  evaluation:
    custom_metrics: {}
    episode_len_mean: 496.0
    episode_media: {}
    episode_reward_max: -2.949999999999983
    episode_reward_mean: -2.949999999999983
    episode_reward_min: -2.949999999999983
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 496
      episode_reward:
      - -2.949999999999983
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.03886779835528077
      mean_env_render_ms: 0.0
      mean_env_wait_ms: 34.325094256926576
      mean_inference_ms: 1.4524114743412417
      mean_ra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,35,21252.4,875028,2.7195,7.68,-2.81,156.39




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 900028
  custom_metrics: {}
  date: 2021-10-20_14-54-25
  done: false
  episode_len_mean: 161.72
  episode_media: {}
  episode_reward_max: 7.160000000000016
  episode_reward_mean: 2.4632000000000147
  episode_reward_min: -1.8800000000000012
  episodes_this_iter: 71
  episodes_total: 2417
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 899931
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 899595
    last_target_update_ts: 402090000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 2.2668983936309814
        max_q: 61.24565124511719
        mean_q: 10.856223106384277
        min_q: 3.101163625717163
    learner_queue:
      size_count: 402105
      size_mean: 15.1
      size_quantiles:
      - 7.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,36,21895.7,900028,2.4632,7.16,-1.88,161.72




Result for APEX_my_env_9a6b4_00000:
  agent_timesteps_total: 925028
  custom_metrics: {}
  date: 2021-10-20_15-05-27
  done: false
  episode_len_mean: 158.88
  episode_media: {}
  episode_reward_max: 7.060000000000018
  episode_reward_mean: 2.8365000000000147
  episode_reward_min: -2.3099999999999743
  episodes_this_iter: 72
  episodes_total: 2489
  experiment_id: 6c20111047d54658a08015ab604d7e9f
  hostname: linar-Z390-GAMING-X
  info:
    exploration_infos:
    - cur_epsilon: 0.0
      last_timestep: 0
    - cur_epsilon: 0.4
      last_timestep: 924419
    - cur_epsilon: 0.0006553600000000003
      last_timestep: 924787
    last_target_update_ts: 414144000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 1.0e-05
        grad_gnorm: 1.6672449111938477
        max_q: 38.29267120361328
        mean_q: 11.324572563171387
        min_q: 3.3063716888427734
    learner_queue:
      size_count: 414160
      size_mean: 14.68
      size_quantiles:
      - 5.0
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
APEX_my_env_9a6b4_00000,RUNNING,192.168.3.5:162,37,22557.4,925028,2.8365,7.06,-2.31,158.88


