# Create Environment

## Import 

In [1]:
import itertools
import pandas as pd
import geopandas as geopd

import numpy as np
from sklearn.model_selection import train_test_split

import gym
from gym.wrappers import TimeLimit
# import time

import warnings
warnings.filterwarnings('ignore')


In [2]:
import ray
# import tune
from ray import tune
from ray.rllib.agents import ppo, a3c, dqn
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env


In [3]:
ray.init()


{'node_ip_address': '127.0.0.1',
 'raylet_ip_address': '127.0.0.1',
 'redis_address': '127.0.0.1:45117',
 'object_store_address': '/tmp/ray/session_2022-02-21_12-47-59_948171_25330/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2022-02-21_12-47-59_948171_25330/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2022-02-21_12-47-59_948171_25330',
 'metrics_export_port': 63412,
 'gcs_address': '127.0.0.1:56266',
 'node_id': 'd553d051afe8bc323e6af46ae0ac10e76e3571942174f1b8d87b7ffb'}

## 1. Get the data

In [4]:
## import data
nb_of_data = 'Manhattan'

data = pd.read_csv(f'./Data/demand_{nb_of_data}.csv')
distance_data = pd.read_csv(f'./Data/distance_info_{nb_of_data}.csv')
# distance_data = geopd.GeoDataFrame(pd.read_csv(f'./Data/distance_info_{nb_of_data}.csv'))


In [5]:
# Env params:
max_episode_steps = 300
max_client = 30

env_config = {'demand_data': data,
              'distance_data': distance_data,
              'client_limit': max_client,
              'customers_per_taxi': 3,
              'max_timestep': 250}
del data, distance_data


Add pick up action
multiple customer in taxi instead of 1 (car sharing idea)
Drop odd action????
add time instead of limited number of customers

## 2. Define the environment

In [6]:
class Multi_passanger_taxi(gym.Env):
    def __init__(self, env_config):
        # Get data from input
        self.data = env_config['demand_data']
        self.distance_data = env_config['distance_data']
        self.max_timestep = env_config['max_timestep']
        # max number of customer per taxi
        self.n = env_config['customers_per_taxi']

        self.locations = np.unique(self.distance_data.PULocationID.values)
        self.nb_locations = len(self.locations)

        self.locations = dict(zip(range(self.nb_locations), self.locations))
        self.location_to_index = dict((v, k)
                                      for k, v in self.locations.items())

        self.customers = 0
        self.custumer_count = self.data.Demand.sum()
        self.customers_limit = min(
            self.custumer_count, env_config['client_limit'])
        self.current_client_count = 0

        self.action_space = gym.spaces.Dict({'move': gym.spaces.Discrete(self.nb_locations),
                                            'pickup': gym.spaces.Discrete(2)})

        # Define Observation space
        spaces = {'position': gym.spaces.Box(low=np.array([0]), high=np.array([self.nb_locations-1]), dtype=int),
                  'state': gym.spaces.Box(low=-1, high=self.nb_locations-1, shape=(self.n,), dtype=int), }

        self.observation_space = gym.spaces.Dict(spaces)
        # ## Initialise the space
        self.reset()

    def get_distane(self, PU_location, DO_location):
        distance = self.distance_data[(self.distance_data.PULocationID == PU_location) &
                                      (self.distance_data.DOLocationID == DO_location)]['distance'].values
        return distance[0]

    def step(self, action):
        # print('*********************************next step*********************************')
        done = False
        reward = 0
        move_action = action['move']
        pick_up_action = action['pickup']
        taxi_location = self.locations[self.state['position'][0]]
        taxi_state = self.state['state']
        # print(f'pick up action is {pick_up_action}')
        if pick_up_action == 1:  # If action is pick up client:
            # print(taxi_state.values())
            if -1 not in taxi_state:  # taxi is full
                # print(f'We are trying to pick up but full taxi {taxi_state}')
                reward -= 100
                # print(self.state)
            else:  # taxi has an empty spot
                # print('We are trying to pick up someone')
                for i in range(self.n):
                    if taxi_state[i] == -1:
                        if taxi_location in self.demand_dict.keys():  # Current location has a client:
                            # print(f'Pick up client in position {i}')
                            self.current_client_count += 1
                            destination_location = self.demand_dict[taxi_location].pop(
                                0)
                            self.state['state'][i] = self.location_to_index[destination_location]
                            if self.demand_dict[taxi_location] == []:
                                self.demand_dict.pop(taxi_location)
                            reward += 10
                            break

        action_location = self.locations[move_action]
        if action_location in taxi_state:  # if action is going to the client destination
            destination_indexes, = np.where(taxi_state == action_location)
            # print(f'Dropped off client {destination_indexes}')
            # print('npwhere res',destination_indexes)
            for destination_index in destination_indexes:
                # print('single index', destination_index)
                # print('chennnn')
                self.customers += 1
                self.current_client_count -= 1
                self.state['state'][destination_index] = -1
                reward += self.get_distane(taxi_location, action_location)*100
        else:  # We have a client but we are not going to his destination
            if self.current_client_count == 0:
                # print(f'Wrong location with neg rew {taxi_state}')
                reward += -self.get_distane(taxi_location, action_location)*10
            # else:
                # print(f'Wrong location but no client {taxi_state}')

        self.taxi_path.append(move_action)
        self.state['position'][0] = move_action
        self.time_step += 1

        if self.customers == self.customers_limit or len(self.demand_dict) == 0:
            done = True

        # Set placeholder for info
        info = {'taxi location & state': self.state,
                'nb of satistied customers ': self.customers,
                'number of steps': self.time_step
                }

        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass

    def reset(self):
        self.time_step = 0
        self.taxi_path = []
        self.state = self.observation_space.sample()
        self.state['state'] = -np.ones(self.n)
        # print(self.state)
        self.demand_dict = {pu: [] for pu in self.data.PU_LocationID}
        for index in range(len(self.data)):
            row_index = self.data.loc[index]
            # print(row_index.PU_LocationID)
            self.demand_dict[row_index.PU_LocationID] += [
                row_index.DO_LocationID for _ in range(row_index.Demand)]
        self.customers = 0
        return self.state


In [7]:
class Single_passanger_taxi(gym.Env):
    def __init__(self, env_config):
        # Get data from input
        self.data = env_config['demand_data']
        self.distance_data = env_config['distance_data']
        self.max_timestep = env_config['max_timestep']

        self.locations = np.unique(self.distance_data.PULocationID.values)
        self.nb_locations = len(self.locations)

        self.locations = dict(zip(range(self.nb_locations), self.locations))
        self.location_to_index = dict((v, k)
                                      for k, v in self.locations.items())

        self.customers = 0
        self.custumer_count = self.data.Demand.sum()
        self.customers_limit = min(
            self.custumer_count, env_config['client_limit'])
        # Define Actions we can
        self.action_space = gym.spaces.Discrete(self.nb_locations*2)

        # Define Observation space
        self.observation_space = gym.spaces.Box(low=np.array([0, -1]),
                                                high=np.array([self.nb_locations-1, self.nb_locations-1]), dtype=int)  # TODO: maybe repeated info here ????

        # ## Initialise the space
        self.reset()

    def get_distane(self, PU_location, DO_location):
        distance = self.distance_data[(self.distance_data.PULocationID == PU_location) &
                                      (self.distance_data.DOLocationID == DO_location)]['distance'].values
        return distance[0]

    def discretize_action_space(self, action):
        list_action = [(i, j) for i in range(self.nb_locations)
                       for j in [0, 1]]
        dict_action = dict(zip(range(2*self.nb_locations), list_action))
        return dict_action[action]

    def pick_up_action(self):
        taxi_location = self.locations[self.state[0]]
        if self.state[1] == -1:  # Check if taxi is empty
            if taxi_location in self.demand_dict.keys():  # Current location has a client:
                destination_location = self.demand_dict[taxi_location].pop(0)
                self.state[1] = self.location_to_index[destination_location]
                if self.demand_dict[taxi_location] == []:
                    self.demand_dict.pop(taxi_location)
                return 10
            else:  # Taxi empty and location has no client:
                return 0
        else:  # Taxi has a client so it's impossible action to pick up one client
            return -100

    def move_action(self, action):
        action_location = self.locations[action]
        taxi_location = self.locations[self.state[0]]
        if self.state[1] == -1:  # Empty taxi
            # return -self.get_distane(taxi_location,action_location)
            return 0
        else:  # taxi already has a client
            taxi_destination = self.locations[self.state[1]]
            # if action is going to the client destination
            if action == self.state[1]:
                self.customers += 1
                self.state[1] = -1
                return self.get_distane(taxi_location, action_location)*100
            else:  # We have a client but we are not going to his destination
                return -self.get_distane(taxi_location, action_location)*10

    def step(self, action):

        action = self.discretize_action_space(action)
        # print(self.state)
        done = False
        reward = 0
        go_action = action[0]
        pick_up_action = action[1]

        if pick_up_action == 1:  # If action is pick up client:
            reward += self.pick_up_action()
        reward = self.move_action(go_action)

        self.taxi_path.append(action[0])
        self.state[0] = action[0]
        self.time_step += 1

        if self.customers == self.customers_limit or len(self.demand_dict) == 0:
            done = True

        # Set placeholder for info
        info = {}

        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass

    def reset(self):
        self.time_step = 0
        self.taxi_path = []
        self.state = self.observation_space.sample()
        # self.state[2]=0
        self.state[1] = -1
        self.demand_dict = {pu: [] for pu in self.data.PU_LocationID}
        for index in range(len(self.data)):
            row_index = self.data.loc[index]
            # print(row_index.PU_LocationID)
            self.demand_dict[row_index.PU_LocationID] += [
                row_index.DO_LocationID for _ in range(row_index.Demand)]
        self.customers = 0
        return self.state


In [8]:
# Registering the env for easier use.
def env_multi_creator(env_config):
    # return an env instance
    return TimeLimit(Multi_passanger_taxi(env_config), env_config['max_timestep'])


register_env("Multi_passanger_env", env_multi_creator)


In [9]:
def env_single_reator(env_config):
    # return an env instance
    return TimeLimit(Single_passanger_taxi(env_config),env_config['max_timestep'])


register_env("Single_passanger_env", env_single_reator)


## Tune Grid_search training

Returns an ExperimentAnalysis object that allows further analysis of the training results and retrieving the checkpoint(s) of the trained agent. It also simplifies saving the trained agent.

In [10]:
chosen_env = 'Multi_passanger_env'
# 0.005, 0.0001, 0.0005

In [15]:
config = {
    "env": chosen_env,
    'env_config': env_config,
    'framework': 'torch',
    'preprocessor_pref': 'deepmind',"vf_clip_param": 400,# t'log_level': 'INFO',
    "lr": tune.grid_search([0.005, 0.0001, 0.0005]),
}
# trainer_config['env_config']=env_config
# trainer_config['framework']='torch'
stop_criteria = {"episode_reward_max": 0,
    "agent_timesteps_total": 400000}


In [16]:
# tune.run() allows setting a custom log directory (other than ``~/ray-results``)
# and automatically saving the trained agent
analysis = ray.tune.run(
    ppo.PPOTrainer,
    config=config,
    # local_dir=log_dir,
    stop=stop_criteria,
    checkpoint_at_end=True)


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,PENDING,,0.005
PPOTrainer_Multi_passanger_env_29773_00001,PENDING,,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


[2m[36m(PPOTrainer pid=25400)[0m 2022-02-21 12:52:25,638	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=25400)[0m 2022-02-21 12:52:25,639	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPOTrainer pid=25399)[0m 2022-02-21 12:52:25,638	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=25399)[0m 2022-02-21 12:52:25,638	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.

Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005





Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Trial name,status,loc,lr
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-02-21_12-53-04
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -10470.0
  episode_reward_mean: -12192.708364420352
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 16
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 0.005
          entropy: 4.509699287465824
          entropy_coeff: 0.0
          kl: 0.5131268975525812
          policy_loss: 0.16676502465570886
          total_loss: 5571682.215860215
          vf_explained_var: -7.89729497765982e-07
          vf_loss: 5571681.974059139
        model: {}
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sample

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,1.0,31.5911,4000.0,-12192.7,-10470.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,1.0,30.6398,4000.0,-12037.3,-10639.3,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-02-21_12-53-40
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5370.0
  episode_reward_mean: -9471.723243800394
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 32
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 0.005
          entropy: 4.263764986940609
          entropy_coeff: 0.0
          kl: 0.287660338967862
          policy_loss: 0.07085422966378911
          total_loss: 934505.6132056451
          vf_explained_var: -0.0010916431744893392
          vf_loss: 934505.4572580645
        model: {}
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_st

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,2.0,67.0488,8000.0,-9471.72,-5370.0,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,2.0,66.1739,8000.0,-11779.7,-10070.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-02-21_12-54-14
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2952.094241449552
  episode_reward_mean: -7595.465797387146
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 48
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.005
          entropy: 3.943615015860527
          entropy_coeff: 0.0
          kl: 0.2521015409535078
          policy_loss: 0.044452342501170534
          total_loss: 225813.4232610887
          vf_explained_var: -1.1334624341739121e-05
          vf_loss: 225813.26581821236
        model: {}
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    n

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,3.0,101.245,12000.0,-7595.47,-2952.09,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,3.0,100.463,12000.0,-11744.2,-9947.05,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,3.0,101.245,12000.0,-7595.47,-2952.09,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,3.0,100.463,12000.0,-11744.2,-9947.05,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,3.0,101.245,12000.0,-7595.47,-2952.09,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,3.0,100.463,12000.0,-11744.2,-9947.05,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-02-21_12-54-34
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1250.2966158557394
  episode_reward_mean: -6345.582783524387
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 64
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.005
          entropy: 3.6755725229940106
          entropy_coeff: 0.0
          kl: 0.16528816764737025
          policy_loss: 0.03531320875270232
          total_loss: 139010.62354145665
          vf_explained_var: -0.02114675634650774
          vf_loss: 139010.47626008064
        model: {}
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    n

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,4.0,121.529,16000.0,-6345.58,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,4.0,120.675,16000.0,-11452.8,-8870.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,4.0,121.529,16000.0,-6345.58,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,4.0,120.675,16000.0,-11452.8,-8870.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,4.0,121.529,16000.0,-6345.58,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,4.0,120.675,16000.0,-11452.8,-8870.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,4.0,121.529,16000.0,-6345.58,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,4.0,120.675,16000.0,-11452.8,-8870.0,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-02-21_12-54-53
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1250.2966158557394
  episode_reward_mean: -5625.665788021365
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 80
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.005
          entropy: 3.603085441999538
          entropy_coeff: 0.0
          kl: 0.1270938138697329
          policy_loss: -0.004893799897243259
          total_loss: 128450.16984627016
          vf_explained_var: -0.014095916030227498
          vf_loss: 128450.04635416667
        model: {}
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,5.0,140.248,20000.0,-5625.67,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,5.0,139.506,20000.0,-11236.1,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,5.0,140.248,20000.0,-5625.67,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,5.0,139.506,20000.0,-11236.1,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,5.0,140.248,20000.0,-5625.67,-1250.3,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,5.0,139.506,20000.0,-11236.1,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2022-02-21_12-55-12
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -956.6302223693804
  episode_reward_mean: -4975.616738585938
  episode_reward_min: -13670.0
  episodes_this_iter: 16
  episodes_total: 96
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.005
          entropy: 3.4507761921933904
          entropy_coeff: 0.0
          kl: 0.10297681945111513
          policy_loss: -0.008705849391258052
          total_loss: 89427.95733891969
          vf_explained_var: -0.0035047999633255823
          vf_loss: 89427.80980395245
        model: {}
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,5.0,139.506,20000.0,-11236.1,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2022-02-21_12-55-14
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -8555.372748330037
  episode_reward_mean: -11098.931056435904
  episode_reward_min: -13570.0
  episodes_this_iter: 16
  episodes_total: 96
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000003
          entropy: 4.77091204632995
          entropy_coeff: 0.0
          kl: 0.01804186671059398
          policy_loss: -0.04361919370449839
          total_loss: 4924168.556586022
          vf_explained_var: 4.9874539016395486e-05
          vf_loss: 4924168.611827957
        model: {}
    num_agent_steps_sampled: 24000
    num_agent_steps_trai

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,6.0,159.251,24000.0,-11098.9,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,6.0,159.251,24000.0,-11098.9,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,6.0,159.251,24000.0,-11098.9,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,6.0,159.251,24000.0,-11098.9,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,6.0,159.388,24000.0,-4975.62,-956.63,-13670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,6.0,159.251,24000.0,-11098.9,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2022-02-21_12-55-43
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -349.2892864396639
  episode_reward_mean: -3476.231353236826
  episode_reward_min: -12256.63022236938
  episodes_this_iter: 16
  episodes_total: 112
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.005
          entropy: 3.348922892539732
          entropy_coeff: 0.0
          kl: 0.10132652328129997
          policy_loss: 0.005075243666707988
          total_loss: 46744.452233230426
          vf_explained_var: 0.012876397691747194
          vf_loss: 46744.216284442206
        model: {}
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 2

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,7.0,189.601,28000.0,-3476.23,-349.289,-12256.6,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,7.0,190.418,28000.0,-10900.5,-8555.37,-13570.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2022-02-21_12-56-13
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -2379.067364416835
  episode_reward_min: -7370.0
  episodes_this_iter: 16
  episodes_total: 128
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 3.417187499999999
          cur_lr: 0.005
          entropy: 3.3791114073927684
          entropy_coeff: 0.0
          kl: 0.0415554177029946
          policy_loss: -0.02480083611062778
          total_loss: 36756.04177298597
          vf_explained_var: 0.010881775233053392
          vf_loss: 36755.92471359417
        model: {}
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,8.0,220.022,32000.0,-2379.07,-253.958,-7370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,8.0,220.022,32000.0,-2379.07,-253.958,-7370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,8.0,220.022,32000.0,-2379.07,-253.958,-7370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,8.0,220.022,32000.0,-2379.07,-253.958,-7370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,8.0,220.022,32000.0,-2379.07,-253.958,-7370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2022-02-21_12-56-42
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1900.7104985534947
  episode_reward_min: -4946.848124948364
  episodes_this_iter: 16
  episodes_total: 144
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 5.12578125
          cur_lr: 0.005
          entropy: 3.3664030446801134
          entropy_coeff: 0.0
          kl: 0.04015191412220114
          policy_loss: -0.013284223207262575
          total_loss: 38227.390225974465
          vf_explained_var: 0.01620130801713595
          vf_loss: 38227.197791971186
        model: {}
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,9.0,248.425,36000.0,-1900.71,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,8.0,219.844,32000.0,-10740.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2022-02-21_12-56-43
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -8344.607295599839
  episode_reward_mean: -10587.967055504534
  episode_reward_min: -13870.0
  episodes_this_iter: 16
  episodes_total: 144
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000003
          entropy: 4.700768715848205
          entropy_coeff: 0.0
          kl: 0.020276589340805506
          policy_loss: -0.05337717331625441
          total_loss: 4845830.807258065
          vf_explained_var: -1.0445105132236276e-05
          vf_loss: 4845830.853763441
        model: {}
    num_agent_steps_sampled: 36000
    num_agent_steps_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,9.0,248.425,36000.0,-1900.71,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,9.0,248.43,36000.0,-10588.0,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,9.0,248.425,36000.0,-1900.71,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,9.0,248.43,36000.0,-10588.0,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,9.0,248.425,36000.0,-1900.71,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,9.0,248.43,36000.0,-10588.0,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,9.0,248.425,36000.0,-1900.71,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,9.0,248.43,36000.0,-10588.0,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2022-02-21_12-57-04
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1599.6279337216433
  episode_reward_min: -4946.848124948364
  episodes_this_iter: 16
  episodes_total: 160
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 7.688671874999998
          cur_lr: 0.005
          entropy: 3.3377984077699723
          entropy_coeff: 0.0
          kl: 0.03751646713095449
          policy_loss: -0.005307459044120004
          total_loss: 32240.091860666584
          vf_explained_var: 0.004297789386523667
          vf_loss: 32239.80890456989
        model: {}
    num_agent_steps_sampled: 40000
    num_agent_steps_trained:

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,10.0,270.894,40000.0,-1599.63,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,10.0,270.767,40000.0,-10435.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,10.0,270.894,40000.0,-1599.63,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,10.0,270.767,40000.0,-10435.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,10.0,270.894,40000.0,-1599.63,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,10.0,270.767,40000.0,-10435.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,10.0,270.894,40000.0,-1599.63,-253.958,-4946.85,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,10.0,270.767,40000.0,-10435.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2022-02-21_12-57-29
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1365.7442464160058
  episode_reward_min: -3444.1635092007823
  episodes_this_iter: 16
  episodes_total: 176
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 11.533007812499998
          cur_lr: 0.005
          entropy: 3.249197576891991
          entropy_coeff: 0.0
          kl: 0.023640642612161106
          policy_loss: -0.02129250214164776
          total_loss: 30311.418630817374
          vf_explained_var: 0.010068357183087257
          vf_loss: 30311.167421664988
        model: {}
    num_agent_steps_sampled: 44000
    num_agent_steps_traine

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,11.0,295.542,44000.0,-1365.74,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,11.0,295.366,44000.0,-10286.4,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,11.0,295.542,44000.0,-1365.74,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,11.0,295.366,44000.0,-10286.4,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,11.0,295.542,44000.0,-1365.74,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,11.0,295.366,44000.0,-10286.4,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,11.0,295.542,44000.0,-1365.74,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,11.0,295.366,44000.0,-10286.4,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,11.0,295.542,44000.0,-1365.74,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,11.0,295.366,44000.0,-10286.4,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2022-02-21_12-57-55
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1265.9353505662934
  episode_reward_min: -3444.1635092007823
  episodes_this_iter: 16
  episodes_total: 192
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 17.299511718750004
          cur_lr: 0.005
          entropy: 3.2933676391519526
          entropy_coeff: 0.0
          kl: 0.034631548071704855
          policy_loss: -0.00611089304670371
          total_loss: 42043.25298481603
          vf_explained_var: 0.030881538121931014
          vf_loss: 42042.65987535702
        model: {}
    num_agent_steps_sampled: 48000
    num_agent_steps_trained

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,12.0,321.172,48000.0,-1265.94,-253.958,-3444.16,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,12.0,320.763,48000.0,-10173.1,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2022-02-21_12-58-24
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1230.3892568176534
  episode_reward_min: -2670.0
  episodes_this_iter: 16
  episodes_total: 208
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 25.949267578124992
          cur_lr: 0.005
          entropy: 3.3175541682909895
          entropy_coeff: 0.0
          kl: 0.019767816698618882
          policy_loss: -0.0013753713338926273
          total_loss: 27583.26017751386
          vf_explained_var: 0.050596970127474876
          vf_loss: 27582.748538043936
        model: {}
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,13.0,350.283,52000.0,-1230.39,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,13.0,350.283,52000.0,-1230.39,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,13.0,350.283,52000.0,-1230.39,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,13.0,350.283,52000.0,-1230.39,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,13.0,350.283,52000.0,-1230.39,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2022-02-21_12-58-54
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -253.95809349259522
  episode_reward_mean: -1210.28671740885
  episode_reward_min: -2670.0
  episodes_this_iter: 16
  episodes_total: 224
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 25.949267578124992
          cur_lr: 0.005
          entropy: 3.3398222213150355
          entropy_coeff: 0.0
          kl: 0.02390750870791082
          policy_loss: -0.0002910488936048682
          total_loss: 34899.806644037715
          vf_explained_var: -0.07890389119425127
          vf_loss: 34899.186526062666
        model: {}
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
   

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,13.0,350.716,52000.0,-10026.5,-8344.61,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2022-02-21_12-58-56
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7970.0
  episode_reward_mean: -9822.743309171388
  episode_reward_min: -13870.0
  episodes_this_iter: 16
  episodes_total: 224
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.628736545706308
          entropy_coeff: 0.0
          kl: 0.016586330705136362
          policy_loss: -0.03840035556224725
          total_loss: 3368658.907123656
          vf_explained_var: -3.064191469582178e-06
          vf_loss: 3368658.936827957
        model: {}
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 5600

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,14.0,380.705,56000.0,-9822.74,-7970.0,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,14.0,380.705,56000.0,-9822.74,-7970.0,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,14.0,380.705,56000.0,-9822.74,-7970.0,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,14.0,380.705,56000.0,-9822.74,-7970.0,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,14.0,380.756,56000.0,-1210.29,-253.958,-2670.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,14.0,380.705,56000.0,-9822.74,-7970.0,-13870.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2022-02-21_12-59-25
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -347.67971957827206
  episode_reward_mean: -1137.4243910865268
  episode_reward_min: -2470.0
  episodes_this_iter: 16
  episodes_total: 240
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 38.9239013671875
          cur_lr: 0.005
          entropy: 3.3223235607147217
          entropy_coeff: 0.0
          kl: 0.008656164745437513
          policy_loss: -0.012550382133853693
          total_loss: 23130.84119203629
          vf_explained_var: -0.04932070150170275
          vf_loss: 23130.51669265583
        model: {}
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    n

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,15.0,411.714,60000.0,-1137.42,-347.68,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,15.0,411.186,60000.0,-9593.18,-7970.0,-12370.0,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2022-02-21_12-59-54
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -270.0
  episode_reward_mean: -1087.0593067999148
  episode_reward_min: -2470.0
  episodes_this_iter: 16
  episodes_total: 256
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 38.9239013671875
          cur_lr: 0.005
          entropy: 3.291378976196371
          entropy_coeff: 0.0
          kl: 0.009283811310968308
          policy_loss: -0.007338651175540622
          total_loss: 28412.91352985341
          vf_explained_var: -0.01859797264939995
          vf_loss: 28412.55932433426
        model: {}
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampl

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,16.0,440.603,64000.0,-1087.06,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,16.0,439.869,64000.0,-9421.98,-7370.0,-11952.4,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,16.0,440.603,64000.0,-1087.06,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,16.0,439.869,64000.0,-9421.98,-7370.0,-11952.4,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,16.0,440.603,64000.0,-1087.06,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,16.0,439.869,64000.0,-9421.98,-7370.0,-11952.4,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,16.0,440.603,64000.0,-1087.06,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,16.0,439.869,64000.0,-9421.98,-7370.0,-11952.4,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2022-02-21_13-00-16
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -270.0
  episode_reward_mean: -1043.369892015105
  episode_reward_min: -2470.0
  episodes_this_iter: 16
  episodes_total: 272
  experiment_id: 71b777217cba43409ead8cde7cddc931
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 38.9239013671875
          cur_lr: 0.005
          entropy: 3.3167045772716564
          entropy_coeff: 0.0
          kl: 0.012143132814301152
          policy_loss: -0.0024189760247545857
          total_loss: 28018.175962517853
          vf_explained_var: 0.027320476949855847
          vf_loss: 28017.705592264156
        model: {}
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sa

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,17.0,461.818,68000.0,-1043.37,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,17.0,460.445,68000.0,-9239.9,-7070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,17.0,461.818,68000.0,-1043.37,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,17.0,460.445,68000.0,-9239.9,-7070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,17.0,461.818,68000.0,-1043.37,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,17.0,460.445,68000.0,-9239.9,-7070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,RUNNING,127.0.0.1:25399,0.005,17.0,461.818,68000.0,-1043.37,-270.0,-2470.0,250.0
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,17.0,460.445,68000.0,-9239.9,-7070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,PENDING,,0.0005,,,,,,,


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2022-02-21_13-00-36
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6570.0
  episode_reward_mean: -8966.852123422163
  episode_reward_min: -11557.30364779992
  episodes_this_iter: 16
  episodes_total: 288
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.518039574161652
          entropy_coeff: 0.0
          kl: 0.01630268313958825
          policy_loss: -0.041797921101572696
          total_loss: 2183576.217204301
          vf_explained_var: -2.6757999133038265e-07
          vf_loss: 2183576.2448252686
        model: {}
    num_agent_steps_sampled: 72000
    num_agent_steps_t

[2m[36m(PPOTrainer pid=25681)[0m 2022-02-21 13:00:43,191	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=25681)[0m 2022-02-21 13:00:43,191	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,18.0,480.943,72000.0,-8966.85,-6570.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,,,,,,,
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18.0,482.663,72000.0,-961.03,30.0,-2170.0,250.0




Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,18.0,480.943,72000.0,-8966.85,-6570.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,,,,,,,
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18.0,482.663,72000.0,-961.03,30.0,-2170.0,250.0


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,18.0,480.943,72000.0,-8966.85,-6570.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,,,,,,,
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18.0,482.663,72000.0,-961.03,30.0,-2170.0,250.0


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2022-02-21_13-00-54
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6070.0
  episode_reward_mean: -8651.706228504168
  episode_reward_min: -11557.30364779992
  episodes_this_iter: 16
  episodes_total: 304
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.487947942364601
          entropy_coeff: 0.0
          kl: 0.017273389502031562
          policy_loss: -0.04819934849216733
          total_loss: 2015460.6987231183
          vf_explained_var: 4.6145531439012095e-08
          vf_loss: 2015460.7255712366
        model: {}
    num_agent_steps_sampled: 76000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,19.0,499.059,76000.0,-8651.71,-6070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,,,,,,,
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18.0,482.663,72000.0,-961.03,30.0,-2170.0,250.0


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,19.0,499.059,76000.0,-8651.71,-6070.0,-11557.3,250.0
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,,,,,,,
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18.0,482.663,72000.0,-961.03,30.0,-2170.0,250.0


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-02-21_13-01-09
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -10870.0
  episode_reward_mean: -11868.237996827613
  episode_reward_min: -12770.0
  episodes_this_iter: 16
  episodes_total: 16
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 0.0005000000000000002
          entropy: 4.87055839005337
          entropy_coeff: 0.0
          kl: 0.027914215034607958
          policy_loss: -0.04016768959030429
          total_loss: 6320378.97217742
          vf_explained_var: -6.003001684783607e-05
          vf_loss: 6320378.968145161
        model: {}
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
  

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,19,499.059,76000,-8651.71,-6070,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,1,20.3396,4000,-11868.2,-10870,-12770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2022-02-21_13-01-13
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5670.0
  episode_reward_mean: -8305.313518435427
  episode_reward_min: -11557.30364779992
  episodes_this_iter: 16
  episodes_total: 320
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.434933028682586
          entropy_coeff: 0.0
          kl: 0.016697625107847433
          policy_loss: -0.05180840750315016
          total_loss: 1784159.1761592743
          vf_explained_var: -1.5548480454311575e-07
          vf_loss: 1784159.214952957
        model: {}
    num_agent_steps_sampled: 80000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,20,518.045,80000,-8305.31,-5670,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,1,20.3396,4000,-11868.2,-10870,-12770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,20,518.045,80000,-8305.31,-5670,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,1,20.3396,4000,-11868.2,-10870,-12770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,20,518.045,80000,-8305.31,-5670,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,1,20.3396,4000,-11868.2,-10870,-12770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-02-21_13-01-27
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -10870.0
  episode_reward_mean: -11869.215906751753
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 32
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 0.0005000000000000002
          entropy: 4.832904369087629
          entropy_coeff: 0.0
          kl: 0.030671198746752764
          policy_loss: -0.05294896494346841
          total_loss: 6003680.1211021505
          vf_explained_var: -0.0008954046234007805
          vf_loss: 6003680.167069892
        model: {}
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_s

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,21,536.921,84000,-7766.75,-5170,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,2,39.2154,8000,-11869.2,-10870,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,21,536.921,84000,-7766.75,-5170,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,2,39.2154,8000,-11869.2,-10870,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,21,536.921,84000,-7766.75,-5170,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,2,39.2154,8000,-11869.2,-10870,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-02-21_13-01-46
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -9970.0
  episode_reward_mean: -11695.562377175002
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 48
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.0005000000000000002
          entropy: 4.814029493126818
          entropy_coeff: 0.0
          kl: 0.027024842138969572
          policy_loss: -0.046471896659462684
          total_loss: 5147313.115658602
          vf_explained_var: -5.24841329102875e-07
          vf_loss: 5147313.138776882
        model: {}
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,21,536.921,84000,-7766.75,-5170,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,3,58.0656,12000,-11695.6,-9970,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2022-02-21_13-01-51
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -4555.9935740759765
  episode_reward_mean: -7273.731252792304
  episode_reward_min: -11557.30364779992
  episodes_this_iter: 16
  episodes_total: 352
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.365021168288364
          entropy_coeff: 0.0
          kl: 0.016573379517940058
          policy_loss: -0.045444537505948096
          total_loss: 942529.1853158602
          vf_explained_var: -2.0047669769615255e-07
          vf_loss: 942529.221656586
        model: {}
    num_agent_steps_sampled: 88000
    num_ag

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,22,555.923,88000,-7273.73,-4555.99,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,3,58.0656,12000,-11695.6,-9970.0,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,22,555.923,88000,-7273.73,-4555.99,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,3,58.0656,12000,-11695.6,-9970.0,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,22,555.923,88000,-7273.73,-4555.99,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,3,58.0656,12000,-11695.6,-9970.0,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-02-21_13-02-05
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -11430.250109159757
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 64
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.0005000000000000002
          entropy: 4.788195229602117
          entropy_coeff: 0.0
          kl: 0.02574447946951933
          policy_loss: -0.05594290628658748
          total_loss: 4213269.0927419355
          vf_explained_var: -7.196780174009262e-07
          vf_loss: 4213269.123252688
        model: {}
    num_agent_steps_sampled: 16000
    num_agent_steps_trai

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,22,555.923,88000,-7273.73,-4555.99,-11557.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,4,76.8262,16000,-11430.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2022-02-21_13-02-10
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -4270.0
  episode_reward_mean: -6735.155102991141
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 368
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.354971241181897
          entropy_coeff: 0.0
          kl: 0.016545986951179863
          policy_loss: -0.04734133812048102
          total_loss: 774882.0995547716
          vf_explained_var: -8.331832065377184e-10
          vf_loss: 774882.1339969758
        model: {}
    num_agent_steps_sampled: 92000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,23,574.541,92000,-6735.16,-4270.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,4,76.8262,16000,-11430.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,23,574.541,92000,-6735.16,-4270.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,4,76.8262,16000,-11430.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-02-21_13-02-24
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -11308.731902269854
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 80
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.0005000000000000002
          entropy: 4.774335400776197
          entropy_coeff: 0.0
          kl: 0.020682428362915218
          policy_loss: -0.05227745206105292
          total_loss: 4159770.404973118
          vf_explained_var: -0.00032765923007842035
          vf_loss: 4159770.4443548387
        model: {}
    num_agent_steps_sampled: 20000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,23,574.541,92000,-6735.16,-4270.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2022-02-21_13-02-29
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3370.0
  episode_reward_mean: -6191.606838160253
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 384
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.3296650430207615
          entropy_coeff: 0.0
          kl: 0.017204630570831114
          policy_loss: -0.046402392647559604
          total_loss: 573611.8708081318
          vf_explained_var: 2.5892770418556786e-08
          vf_loss: 573611.9056787634
        model: {}
    num_agent_steps_sampled: 96000
    num_agent_steps_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,5,96.0016,20000,-11308.7,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2022-02-21_13-02-52
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -11250.966523363444
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 96
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.757538049964495
          entropy_coeff: 0.0
          kl: 0.016854652457563537
          policy_loss: -0.052521509375242935
          total_loss: 3974763.5690860213
          vf_explained_var: -7.624267249978998e-07
          vf_loss: 3974763.614784946
        model: {}
    num_agent_steps_sampled: 24000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,24,593.684,96000,-6191.61,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,6,123.812,24000,-11251.0,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2022-02-21_13-03-00
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3370.0
  episode_reward_mean: -5779.09764527687
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 400
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.321843763577041
          entropy_coeff: 0.0
          kl: 0.016120068856455408
          policy_loss: -0.047998114630720144
          total_loss: 697952.6835433468
          vf_explained_var: -8.210059135190902e-08
          vf_loss: 697952.7219086022
        model: {}
    num_agent_steps_sampled: 100000
    num_agent_steps_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,25,624.487,100000,-5779.1,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,6,123.812,24000,-11251.0,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,25,624.487,100000,-5779.1,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,6,123.812,24000,-11251.0,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,25,624.487,100000,-5779.1,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,6,123.812,24000,-11251.0,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,25,624.487,100000,-5779.1,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,6,123.812,24000,-11251.0,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2022-02-21_13-03-20
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -11027.31025711593
  episode_reward_min: -13170.0
  episodes_this_iter: 16
  episodes_total: 112
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.748693570783061
          entropy_coeff: 0.0
          kl: 0.015934643630220163
          policy_loss: -0.03893899236184855
          total_loss: 3277274.2731182794
          vf_explained_var: 1.5811253619450396e-07
          vf_loss: 3277274.291129032
        model: {}
    num_agent_steps_sampled: 28000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,25,624.487,100000,-5779.1,-3370.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,7,151.217,28000,-11027.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2022-02-21_13-03-27
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3070.0
  episode_reward_mean: -5318.914933085609
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 416
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.261705174497378
          entropy_coeff: 0.0
          kl: 0.01584919664361061
          policy_loss: -0.04643643266491351
          total_loss: 429554.69779905915
          vf_explained_var: -1.5574116860666583e-08
          vf_loss: 429554.73252688174
        model: {}
    num_agent_steps_sampled: 104000
    num_agent_step

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,26,651.75,104000,-5318.91,-3070.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,7,151.217,28000,-11027.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,26,651.75,104000,-5318.91,-3070.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,7,151.217,28000,-11027.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,26,651.75,104000,-5318.91,-3070.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,7,151.217,28000,-11027.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,26,651.75,104000,-5318.91,-3070.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,7,151.217,28000,-11027.3,-7758.53,-13170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2022-02-21_13-03-47
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -10746.967273723989
  episode_reward_min: -13070.0
  episodes_this_iter: 16
  episodes_total: 128
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.713769278475033
          entropy_coeff: 0.0
          kl: 0.017112961656230694
          policy_loss: -0.05542904165252963
          total_loss: 2978449.0101478496
          vf_explained_var: 4.600453120405956e-07
          vf_loss: 2978449.0363575267
        model: {}
    num_agent_steps_sampled: 32000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,26,651.75,104000,-5318.91,-3070.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,8,178.708,32000,-10747.0,-7758.53,-13070.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2022-02-21_13-03-55
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2470.0
  episode_reward_mean: -4946.119646335816
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 432
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.234764089891987
          entropy_coeff: 0.0
          kl: 0.018367416507142974
          policy_loss: -0.05441517489461569
          total_loss: 422114.6568611391
          vf_explained_var: 8.024195189117103e-08
          vf_loss: 422114.697828461
        model: {}
    num_agent_steps_sampled: 108000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,8,178.708,32000,-10747.0,-7758.53,-13070.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,8,178.708,32000,-10747.0,-7758.53,-13070.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,8,178.708,32000,-10747.0,-7758.53,-13070.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,8,178.708,32000,-10747.0,-7758.53,-13070.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2022-02-21_13-04-15
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7758.52635886175
  episode_reward_mean: -10503.908507945174
  episode_reward_min: -12370.0
  episodes_this_iter: 16
  episodes_total: 144
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.719023250251688
          entropy_coeff: 0.0
          kl: 0.015801711921095547
          policy_loss: -0.04341996361611671
          total_loss: 2517382.5479166666
          vf_explained_var: -7.1046813841789e-06
          vf_loss: 2517382.567405914
        model: {}
    num_agent_steps_sampled: 36000
    num_agent_steps_trai

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,9,206.278,36000,-10503.9,-7758.53,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,27,679.247,108000,-4946.12,-2470.0,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,9,206.278,36000,-10503.9,-7758.53,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2022-02-21_13-04-22
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2155.3254585892396
  episode_reward_mean: -4572.696796015027
  episode_reward_min: -10256.321731561053
  episodes_this_iter: 16
  episodes_total: 448
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.1679185769891225
          entropy_coeff: 0.0
          kl: 0.01690846574426971
          policy_loss: -0.05198359995209161
          total_loss: 275951.44620505715
          vf_explained_var: -1.6663664130754369e-09
          vf_loss: 275951.4866221438
        model: {}
    num_agent_steps_sampled: 112000
    nu

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,28,706.545,112000,-4572.7,-2155.33,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,9,206.278,36000,-10503.9,-7758.53,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,28,706.545,112000,-4572.7,-2155.33,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,9,206.278,36000,-10503.9,-7758.53,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,28,706.545,112000,-4572.7,-2155.33,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,9,206.278,36000,-10503.9,-7758.53,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2022-02-21_13-04-42
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -8349.879958445905
  episode_reward_mean: -10318.637990644142
  episode_reward_min: -12370.0
  episodes_this_iter: 16
  episodes_total: 160
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.698493873944846
          entropy_coeff: 0.0
          kl: 0.01674968832994173
          policy_loss: -0.04784960851213464
          total_loss: 2120121.414112903
          vf_explained_var: -6.557856836626606e-06
          vf_loss: 2120121.432594086
        model: {}
    num_agent_steps_sampled: 40000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,28,706.545,112000,-4572.7,-2155.33,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,28,706.545,112000,-4572.7,-2155.33,-10256.3,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2022-02-21_13-04-49
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1870.0
  episode_reward_mean: -4108.917559623913
  episode_reward_min: -9070.0
  episodes_this_iter: 16
  episodes_total: 464
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.070921419769205
          entropy_coeff: 0.0
          kl: 0.016370202982600494
          policy_loss: -0.03596906814604036
          total_loss: 118026.12779186618
          vf_explained_var: -3.8582791564285116e-08
          vf_loss: 118026.15255087576
        model: {}
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,29,733.499,116000,-4108.92,-1870.0,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,29,733.499,116000,-4108.92,-1870.0,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,29,733.499,116000,-4108.92,-1870.0,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,29,733.499,116000,-4108.92,-1870.0,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,10,233.283,40000,-10318.6,-8349.88,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2022-02-21_13-05-09
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7270.0
  episode_reward_mean: -10124.50407873357
  episode_reward_min: -12370.0
  episodes_this_iter: 16
  episodes_total: 176
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.678201540054814
          entropy_coeff: 0.0
          kl: 0.017258456558985807
          policy_loss: -0.0491889461474393
          total_loss: 1859251.796001344
          vf_explained_var: -9.829639106668452e-07
          vf_loss: 1859251.8177419354
        model: {}
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,29,733.499,116000,-4108.92,-1870,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,11,260.473,44000,-10124.5,-7270,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2022-02-21_13-05-17
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1153.9905294760501
  episode_reward_mean: -3848.8912185496806
  episode_reward_min: -9070.0
  episodes_this_iter: 16
  episodes_total: 480
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 0.00010000000000000003
          entropy: 4.121742079591239
          entropy_coeff: 0.0
          kl: 0.022709705134303947
          policy_loss: -0.06850932348189094
          total_loss: 279343.90055968583
          vf_explained_var: -3.268641810263357e-09
          vf_loss: 279343.95140393986
        model: {}
    num_agent_steps_sampled: 120000
    num_agent_ste

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,30,760.562,120000,-3848.89,-1153.99,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,11,260.473,44000,-10124.5,-7270.0,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,30,760.562,120000,-3848.89,-1153.99,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,11,260.473,44000,-10124.5,-7270.0,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,30,760.562,120000,-3848.89,-1153.99,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,11,260.473,44000,-10124.5,-7270.0,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,30,760.562,120000,-3848.89,-1153.99,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,11,260.473,44000,-10124.5,-7270.0,-12370,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2022-02-21_13-05-37
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7270.0
  episode_reward_mean: -9922.397531432402
  episode_reward_min: -11870.0
  episodes_this_iter: 16
  episodes_total: 192
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.657843020654494
          entropy_coeff: 0.0
          kl: 0.01748976412635938
          policy_loss: -0.05648728378632817
          total_loss: 1730356.8773017472
          vf_explained_var: 4.819003484582388e-07
          vf_loss: 1730356.9092405913
        model: {}
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,30,760.562,120000,-3848.89,-1153.99,-9070,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,12,287.761,48000,-9922.4,-7270.0,-11870,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2022-02-21_13-05-44
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1070.0
  episode_reward_mean: -3295.6070985658794
  episode_reward_min: -8570.0
  episodes_this_iter: 16
  episodes_total: 496
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.9624761409656974
          entropy_coeff: 0.0
          kl: 0.01324700460457122
          policy_loss: -0.03472428489851976
          total_loss: 69515.61948714718
          vf_explained_var: 5.304383975203319e-05
          vf_loss: 69515.6405451949
        model: {}
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 1240

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,31,787.803,124000,-3295.61,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,12,287.761,48000,-9922.4,-7270,-11870,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,31,787.803,124000,-3295.61,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,12,287.761,48000,-9922.4,-7270,-11870,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,31,787.803,124000,-3295.61,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,12,287.761,48000,-9922.4,-7270,-11870,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,31,787.803,124000,-3295.61,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,12,287.761,48000,-9922.4,-7270,-11870,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2022-02-21_13-06-04
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7270.0
  episode_reward_mean: -9783.710931534968
  episode_reward_min: -11770.0
  episodes_this_iter: 16
  episodes_total: 208
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.6665177047893565
          entropy_coeff: 0.0
          kl: 0.017023639485365293
          policy_loss: -0.05449730251059537
          total_loss: 1751644.720329301
          vf_explained_var: -1.1799156024891843e-07
          vf_loss: 1751644.7486223117
        model: {}
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,31,787.803,124000,-3295.61,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,13,314.853,52000,-9783.71,-7270,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2022-02-21_13-06-11
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1070.0
  episode_reward_mean: -2876.722789328849
  episode_reward_min: -8570.0
  episodes_this_iter: 16
  episodes_total: 512
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.967069882731284
          entropy_coeff: 0.0
          kl: 0.012372362059879603
          policy_loss: -0.0330790683056318
          total_loss: 71387.49329427084
          vf_explained_var: -2.4807837701612902e-05
          vf_loss: 71387.51367292507
        model: {}
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,32,814.782,128000,-2876.72,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,13,314.853,52000,-9783.71,-7270,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,32,814.782,128000,-2876.72,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,13,314.853,52000,-9783.71,-7270,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,32,814.782,128000,-2876.72,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,13,314.853,52000,-9783.71,-7270,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,32,814.782,128000,-2876.72,-1070,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,13,314.853,52000,-9783.71,-7270,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2022-02-21_13-06-31
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7257.599463081451
  episode_reward_mean: -9642.518618961836
  episode_reward_min: -11770.0
  episodes_this_iter: 16
  episodes_total: 224
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.606092965731057
          entropy_coeff: 0.0
          kl: 0.01773123255615203
          policy_loss: -0.05443539840459664
          total_loss: 1309330.796001344
          vf_explained_var: -1.949007793139386e-07
          vf_loss: 1309330.8297211023
        model: {}
    num_agent_steps_sampled: 56000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,32,814.782,128000,-2876.72,-1070.0,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,14,341.822,56000,-9642.52,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2022-02-21_13-06-38
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -656.9533993972063
  episode_reward_mean: -2502.1650607606275
  episode_reward_min: -8570.0
  episodes_this_iter: 16
  episodes_total: 528
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.917179880860031
          entropy_coeff: 0.0
          kl: 0.012506167437064378
          policy_loss: -0.061346830221353676
          total_loss: 133919.4907153058
          vf_explained_var: -0.0003710703824156074
          vf_loss: 133919.53870547714
        model: {}
    num_agent_steps_sampled: 132000
    num_agent_step

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,33,841.674,132000,-2502.17,-656.953,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,14,341.822,56000,-9642.52,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,33,841.674,132000,-2502.17,-656.953,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,14,341.822,56000,-9642.52,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,33,841.674,132000,-2502.17,-656.953,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,14,341.822,56000,-9642.52,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,33,841.674,132000,-2502.17,-656.953,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,14,341.822,56000,-9642.52,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2022-02-21_13-06-58
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -7257.599463081451
  episode_reward_mean: -9511.48131946685
  episode_reward_min: -11770.0
  episodes_this_iter: 16
  episodes_total: 240
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.585333093007406
          entropy_coeff: 0.0
          kl: 0.018360198278715226
          policy_loss: -0.057549039502778364
          total_loss: 1356154.2190356182
          vf_explained_var: -6.061728282641339e-07
          vf_loss: 1356154.2458501344
        model: {}
    num_agent_steps_sampled: 60000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,33,841.674,132000,-2502.17,-656.953,-8570,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,15,368.74,60000,-9511.48,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2022-02-21_13-07-05
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -470.0
  episode_reward_mean: -2029.9143093650155
  episode_reward_min: -8370.0
  episodes_this_iter: 16
  episodes_total: 544
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.847557826708722
          entropy_coeff: 0.0
          kl: 0.011432240982086813
          policy_loss: -0.0290534803433524
          total_loss: 93071.50073084678
          vf_explained_var: -0.05812591198951967
          vf_loss: 93071.51838877688
        model: {}
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,15,368.74,60000,-9511.48,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,15,368.74,60000,-9511.48,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,15,368.74,60000,-9511.48,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,15,368.74,60000,-9511.48,-7257.6,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2022-02-21_13-07-25
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6553.9169528090315
  episode_reward_mean: -9386.734550754474
  episode_reward_min: -11770.0
  episodes_this_iter: 16
  episodes_total: 256
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.574436315926173
          entropy_coeff: 0.0
          kl: 0.01740132135321743
          policy_loss: -0.04903997275427545
          total_loss: 1098458.4680443548
          vf_explained_var: 8.58819612892725e-09
          vf_loss: 1098458.4928931452
        model: {}
    num_agent_steps_sampled: 64000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,16,395.497,64000,-9386.73,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,34,868.535,136000,-2029.91,-470.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,16,395.497,64000,-9386.73,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2022-02-21_13-07-32
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -370.0
  episode_reward_mean: -1752.99129023542
  episode_reward_min: -8370.0
  episodes_this_iter: 16
  episodes_total: 560
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.8520350792074716
          entropy_coeff: 0.0
          kl: 0.011104066490149424
          policy_loss: -0.04739707949100643
          total_loss: 140000.70273542506
          vf_explained_var: -0.0333684697587003
          vf_loss: 140000.7398342994
        model: {}
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,35,895.399,140000,-1752.99,-370.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,16,395.497,64000,-9386.73,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,35,895.399,140000,-1752.99,-370.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,16,395.497,64000,-9386.73,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,35,895.399,140000,-1752.99,-370.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,16,395.497,64000,-9386.73,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2022-02-21_13-07-52
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6553.9169528090315
  episode_reward_mean: -9277.622733895056
  episode_reward_min: -11770.0
  episodes_this_iter: 16
  episodes_total: 272
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.565599974252844
          entropy_coeff: 0.0
          kl: 0.018100345846078964
          policy_loss: -0.057177724893034625
          total_loss: 926825.8325436828
          vf_explained_var: 1.163431393202915e-05
          vf_loss: 926825.8629872312
        model: {}
    num_agent_steps_sampled: 68000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,35,895.399,140000,-1752.99,-370.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,17,422.298,68000,-9277.62,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,35,895.399,140000,-1752.99,-370.0,-8370,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,17,422.298,68000,-9277.62,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2022-02-21_13-07-59
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -370.0
  episode_reward_mean: -1393.5185181931886
  episode_reward_min: -5470.0
  episodes_this_iter: 16
  episodes_total: 576
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.8642129626325383
          entropy_coeff: 0.0
          kl: 0.01316173657565013
          policy_loss: -0.04658213659359883
          total_loss: 90611.96864184308
          vf_explained_var: -0.0875064323025365
          vf_loss: 90612.00186596942
        model: {}
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,36,922.009,144000,-1393.52,-370.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,17,422.298,68000,-9277.62,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,36,922.009,144000,-1393.52,-370.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,17,422.298,68000,-9277.62,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,36,922.009,144000,-1393.52,-370.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,17,422.298,68000,-9277.62,-6553.92,-11770,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2022-02-21_13-08-18
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6553.9169528090315
  episode_reward_mean: -9164.014611883815
  episode_reward_min: -11170.0
  episodes_this_iter: 16
  episodes_total: 288
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.5408524959318095
          entropy_coeff: 0.0
          kl: 0.018535132096872204
          policy_loss: -0.06215636521798148
          total_loss: 949754.2070060484
          vf_explained_var: 8.670758175593551e-05
          vf_loss: 949754.2435819892
        model: {}
    num_agent_steps_sampled: 72000
    num_agent_steps_tr

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,36,922.009,144000,-1393.52,-370.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,18,449.067,72000,-9164.01,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2022-02-21_13-08-25
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -270.0
  episode_reward_mean: -1196.2485218298127
  episode_reward_min: -5470.0
  episodes_this_iter: 16
  episodes_total: 592
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.828672974596741
          entropy_coeff: 0.0
          kl: 0.012359504073238494
          policy_loss: -0.04464078058278368
          total_loss: 104567.67865738407
          vf_explained_var: -0.05048088168585172
          vf_loss: 104567.71102255545
        model: {}
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,18,449.067,72000,-9164.01,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,18,449.067,72000,-9164.01,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,18,449.067,72000,-9164.01,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,18,449.067,72000,-9164.01,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2022-02-21_13-08-45
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -6553.9169528090315
  episode_reward_mean: -8942.725071519837
  episode_reward_min: -11170.0
  episodes_this_iter: 16
  episodes_total: 304
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.538625269551431
          entropy_coeff: 0.0
          kl: 0.018227092932570473
          policy_loss: -0.05133549960790783
          total_loss: 837365.4209509408
          vf_explained_var: 6.973358892625378e-06
          vf_loss: 837365.4467741936
        model: {}
    num_agent_steps_sampled: 76000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,19,475.794,76000,-8942.73,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,37,948.581,148000,-1196.25,-270.0,-5470,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,19,475.794,76000,-8942.73,-6553.92,-11170,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2022-02-21_13-08-52
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -170.0
  episode_reward_mean: -1076.104223996847
  episode_reward_min: -7156.571633219015
  episodes_this_iter: 16
  episodes_total: 608
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.797151222280277
          entropy_coeff: 0.0
          kl: 0.009941916805964934
          policy_loss: -0.05109694250208396
          total_loss: 294840.00620694726
          vf_explained_var: -0.06802540761168285
          vf_loss: 294840.0431756132
        model: {}
    num_agent_steps_sampled: 152000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,38,975.34,152000,-1076.1,-170.0,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,19,475.794,76000,-8942.73,-6553.92,-11170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,38,975.34,152000,-1076.1,-170.0,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,19,475.794,76000,-8942.73,-6553.92,-11170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,38,975.34,152000,-1076.1,-170.0,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,19,475.794,76000,-8942.73,-6553.92,-11170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2022-02-21_13-09-12
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5670.0
  episode_reward_mean: -8785.987696726017
  episode_reward_min: -10670.0
  episodes_this_iter: 16
  episodes_total: 320
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.4869422066596245
          entropy_coeff: 0.0
          kl: 0.018919242573579926
          policy_loss: -0.05916080642141081
          total_loss: 774709.5368489583
          vf_explained_var: -0.0004005782065852996
          vf_loss: 774709.5672925067
        model: {}
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 8000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,38,975.34,152000,-1076.1,-170,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,20,502.82,80000,-8785.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,38,975.34,152000,-1076.1,-170,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,20,502.82,80000,-8785.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2022-02-21_13-09-19
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -70.0
  episode_reward_mean: -898.6323575966545
  episode_reward_min: -7156.571633219015
  episodes_this_iter: 16
  episodes_total: 624
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.7248761684663836
          entropy_coeff: 0.0
          kl: 0.013591617663700924
          policy_loss: -0.047854433156630044
          total_loss: 101503.81453319262
          vf_explained_var: -0.12388514543092378
          vf_loss: 101503.84911794355
        model: {}
    num_agent_steps_sampled: 156000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,39,1001.98,156000,-898.632,-70,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,20,502.82,80000,-8785.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,39,1001.98,156000,-898.632,-70,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,20,502.82,80000,-8785.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,39,1001.98,156000,-898.632,-70,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,20,502.82,80000,-8785.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2022-02-21_13-09-39
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5670.0
  episode_reward_mean: -8625.98869004297
  episode_reward_min: -10670.0
  episodes_this_iter: 16
  episodes_total: 336
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.48559356248507
          entropy_coeff: 0.0
          kl: 0.017291182153360295
          policy_loss: -0.04673430468806977
          total_loss: 650315.1519321237
          vf_explained_var: 0.00011561705220130182
          vf_loss: 650315.1737231183
        model: {}
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,39,1001.98,156000,-898.632,-70,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,21,529.428,84000,-8625.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00001,RUNNING,127.0.0.1:25400,0.0001,39,1001.98,156000,-898.632,-70,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,21,529.428,84000,-8625.99,-5670,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30,-2170.0,250


Result for PPOTrainer_Multi_passanger_env_29773_00001:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2022-02-21_13-09-46
  done: true
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: 54.808392297728304
  episode_reward_mean: -744.4578166777801
  episode_reward_min: -7156.571633219015
  episodes_this_iter: 16
  episodes_total: 640
  experiment_id: c247e328cf5146c2ae9a625fce22ec2a
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0124999999999997
          cur_lr: 0.00010000000000000003
          entropy: 3.727079496845122
          entropy_coeff: 0.0
          kl: 0.013138279300362448
          policy_loss: -0.04766644834871254
          total_loss: 56142.1547515541
          vf_explained_var: -0.09264480727975087
          vf_loss: 56142.18910345262
        model: {}
    num_agent_steps_sampled: 160000
    num_agent_



Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,21,529.428,84000,-8625.99,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,21,529.428,84000,-8625.99,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,21,529.428,84000,-8625.99,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2022-02-21_13-10-03
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5670.0
  episode_reward_mean: -8391.958555678668
  episode_reward_min: -10670.0
  episodes_this_iter: 16
  episodes_total: 352
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.481225303424302
          entropy_coeff: 0.0
          kl: 0.01947517102499089
          policy_loss: -0.06338289835620471
          total_loss: 603479.802125336
          vf_explained_var: 9.90856078363234e-05
          vf_loss: 603479.8331989248
        model: {}
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
   

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,22,553.463,88000,-8391.96,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,22,553.463,88000,-8391.96,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,22,553.463,88000,-8391.96,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,22,553.463,88000,-8391.96,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2022-02-21_13-10-26
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5670.0
  episode_reward_mean: -8188.653208363152
  episode_reward_min: -10670.0
  episodes_this_iter: 16
  episodes_total: 368
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.441412070489699
          entropy_coeff: 0.0
          kl: 0.019335798778730998
          policy_loss: -0.06994532605793367
          total_loss: 612864.0596679688
          vf_explained_var: 0.0007851550655980264
          vf_loss: 612864.0995988743
        model: {}
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,23,575.977,92000,-8188.65,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,23,575.977,92000,-8188.65,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,23,575.977,92000,-8188.65,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,23,575.977,92000,-8188.65,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,23,575.977,92000,-8188.65,-5670.0,-10670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2022-02-21_13-10-48
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5344.234061302195
  episode_reward_mean: -7891.007818084649
  episode_reward_min: -9635.579076585249
  episodes_this_iter: 16
  episodes_total: 384
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.3899435309953585
          entropy_coeff: 0.0
          kl: 0.018254192069401688
          policy_loss: -0.05757627365009118
          total_loss: 647278.8746639785
          vf_explained_var: -0.004747999932176323
          vf_loss: 647278.903251008
        model: {}
    num_agent_steps_sampled: 96000
    num_agent_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,24,598.639,96000,-7891.01,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,24,598.639,96000,-7891.01,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,24,598.639,96000,-7891.01,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,24,598.639,96000,-7891.01,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2022-02-21_13-11-11
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5344.234061302195
  episode_reward_mean: -7548.114236685381
  episode_reward_min: -9635.579076585249
  episodes_this_iter: 16
  episodes_total: 400
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.368828961669758
          entropy_coeff: 0.0
          kl: 0.017733791687483864
          policy_loss: -0.05061225915998621
          total_loss: 589255.9512359292
          vf_explained_var: -0.000467277919092486
          vf_loss: 589255.976234879
        model: {}
    num_agent_steps_sampled: 100000
    num_agent

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,25,621.257,100000,-7548.11,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,25,621.257,100000,-7548.11,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,25,621.257,100000,-7548.11,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,25,621.257,100000,-7548.11,-5344.23,-9635.58,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2022-02-21_13-11-34
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -5344.234061302195
  episode_reward_mean: -7254.845258209375
  episode_reward_min: -9555.993574075976
  episodes_this_iter: 16
  episodes_total: 416
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.353245290633171
          entropy_coeff: 0.0
          kl: 0.019419030856492533
          policy_loss: -0.07235378410907523
          total_loss: 591068.6398605511
          vf_explained_var: -0.002648585522046653
          vf_loss: 591068.6828797043
        model: {}
    num_agent_steps_sampled: 104000
    num_agen

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,26,643.893,104000,-7254.85,-5344.23,-9555.99,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,26,643.893,104000,-7254.85,-5344.23,-9555.99,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,26,643.893,104000,-7254.85,-5344.23,-9555.99,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,26,643.893,104000,-7254.85,-5344.23,-9555.99,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,26,643.893,104000,-7254.85,-5344.23,-9555.99,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2022-02-21_13-11-57
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -4747.026407662436
  episode_reward_mean: -6809.58483514589
  episode_reward_min: -9370.0
  episodes_this_iter: 16
  episodes_total: 432
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.312058736431983
          entropy_coeff: 0.0
          kl: 0.018243263498372818
          policy_loss: -0.05671233940689314
          total_loss: 545106.8910114247
          vf_explained_var: -0.004497447962402016
          vf_loss: 545106.9220598119
        model: {}
    num_agent_steps_sampled: 108000
    num_agent_steps_trai

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,27,666.824,108000,-6809.58,-4747.03,-9370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,27,666.824,108000,-6809.58,-4747.03,-9370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,27,666.824,108000,-6809.58,-4747.03,-9370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,27,666.824,108000,-6809.58,-4747.03,-9370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2022-02-21_13-12-20
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -4257.273542420976
  episode_reward_mean: -6512.181222341574
  episode_reward_min: -8770.0
  episodes_this_iter: 16
  episodes_total: 448
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.275377669385684
          entropy_coeff: 0.0
          kl: 0.01781515712314072
          policy_loss: -0.058786036081171486
          total_loss: 438651.49602234544
          vf_explained_var: -0.0008500777905987155
          vf_loss: 438651.52657615085
        model: {}
    num_agent_steps_sampled: 112000
    num_agent_steps_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,28,689.738,112000,-6512.18,-4257.27,-8770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,28,689.738,112000,-6512.18,-4257.27,-8770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,28,689.738,112000,-6512.18,-4257.27,-8770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,28,689.738,112000,-6512.18,-4257.27,-8770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,28,689.738,112000,-6512.18,-4257.27,-8770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2022-02-21_13-12-43
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3970.0
  episode_reward_mean: -6136.100420593865
  episode_reward_min: -8470.0
  episodes_this_iter: 16
  episodes_total: 464
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.274542570626864
          entropy_coeff: 0.0
          kl: 0.018934484885207097
          policy_loss: -0.07013942608709937
          total_loss: 493528.42777217744
          vf_explained_var: 0.001640990408517981
          vf_loss: 493528.4695081485
        model: {}
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 11600

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,29,712.588,116000,-6136.1,-3970.0,-8470.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,29,712.588,116000,-6136.1,-3970.0,-8470.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,29,712.588,116000,-6136.1,-3970.0,-8470.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,29,712.588,116000,-6136.1,-3970.0,-8470.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2022-02-21_13-13-05
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3870.0
  episode_reward_mean: -5843.356748910229
  episode_reward_min: -8170.0
  episodes_this_iter: 16
  episodes_total: 480
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.253362357231879
          entropy_coeff: 0.0
          kl: 0.019384194359547517
          policy_loss: -0.06322666244000517
          total_loss: 390824.5299626176
          vf_explained_var: -0.009680571543273106
          vf_loss: 390824.5650222614
        model: {}
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 12000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,30,735.363,120000,-5843.36,-3870.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,30,735.363,120000,-5843.36,-3870.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,30,735.363,120000,-5843.36,-3870.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,30,735.363,120000,-5843.36,-3870.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2022-02-21_13-13-28
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3255.271675965764
  episode_reward_mean: -5510.411582609675
  episode_reward_min: -8170.0
  episodes_this_iter: 16
  episodes_total: 496
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.182882900135492
          entropy_coeff: 0.0
          kl: 0.016717978833982825
          policy_loss: -0.04588283806418379
          total_loss: 386145.616061828
          vf_explained_var: -0.004877329193135743
          vf_loss: 386145.6360719086
        model: {}
    num_agent_steps_sampled: 124000
    num_agent_steps_trai

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,31,758.073,124000,-5510.41,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,31,758.073,124000,-5510.41,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,31,758.073,124000,-5510.41,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,31,758.073,124000,-5510.41,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,31,758.073,124000,-5510.41,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2022-02-21_13-13-51
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3255.271675965764
  episode_reward_mean: -5183.309784639481
  episode_reward_min: -8170.0
  episodes_this_iter: 16
  episodes_total: 512
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.167245511085756
          entropy_coeff: 0.0
          kl: 0.018671876664712223
          policy_loss: -0.06738692778813582
          total_loss: 370392.9819892473
          vf_explained_var: -0.013276026261750088
          vf_loss: 370393.0206317204
        model: {}
    num_agent_steps_sampled: 128000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,32,780.772,128000,-5183.31,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,32,780.772,128000,-5183.31,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,32,780.772,128000,-5183.31,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,32,780.772,128000,-5183.31,-3255.27,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2022-02-21_13-14-14
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -3070.0
  episode_reward_mean: -4882.114341176474
  episode_reward_min: -8170.0
  episodes_this_iter: 16
  episodes_total: 528
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.11512085827448
          entropy_coeff: 0.0
          kl: 0.019375852972740703
          policy_loss: -0.07726937372748169
          total_loss: 417645.83776146674
          vf_explained_var: 0.001652084691550142
          vf_loss: 417645.8840568296
        model: {}
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,33,803.616,132000,-4882.11,-3070.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,33,803.616,132000,-4882.11,-3070.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,33,803.616,132000,-4882.11,-3070.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,33,803.616,132000,-4882.11,-3070.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,33,803.616,132000,-4882.11,-3070.0,-8170.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2022-02-21_13-14-37
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2570.0
  episode_reward_mean: -4619.286669127876
  episode_reward_min: -7970.0
  episodes_this_iter: 16
  episodes_total: 544
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.116520662461558
          entropy_coeff: 0.0
          kl: 0.019382654511650346
          policy_loss: -0.06418352298418521
          total_loss: 274148.38331023185
          vf_explained_var: 0.014445114840743362
          vf_loss: 274148.41807270667
        model: {}
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 1360

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,34,826.513,136000,-4619.29,-2570.0,-7970.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,34,826.513,136000,-4619.29,-2570.0,-7970.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,34,826.513,136000,-4619.29,-2570.0,-7970.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,34,826.513,136000,-4619.29,-2570.0,-7970.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2022-02-21_13-15-00
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2570.0
  episode_reward_mean: -4316.082595838313
  episode_reward_min: -7370.0
  episodes_this_iter: 16
  episodes_total: 560
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.087857717596075
          entropy_coeff: 0.0
          kl: 0.018944997940314057
          policy_loss: -0.05970408189891567
          total_loss: 232513.069921875
          vf_explained_var: -0.01160979482435411
          vf_loss: 232513.100468855
        model: {}
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,35,849.183,140000,-4316.08,-2570.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,35,849.183,140000,-4316.08,-2570.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,35,849.183,140000,-4316.08,-2570.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,35,849.183,140000,-4316.08,-2570.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2022-02-21_13-15-23
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -2170.0
  episode_reward_mean: -3993.253608247977
  episode_reward_min: -7370.0
  episodes_this_iter: 16
  episodes_total: 576
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.039319160164044
          entropy_coeff: 0.0
          kl: 0.0186262217746091
          policy_loss: -0.06304734145962103
          total_loss: 257746.6939904654
          vf_explained_var: -0.0019735759304415797
          vf_loss: 257746.72851352487
        model: {}
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 14400

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,36,872.119,144000,-3993.25,-2170.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,36,872.119,144000,-3993.25,-2170.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,36,872.119,144000,-3993.25,-2170.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,36,872.119,144000,-3993.25,-2170.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,36,872.119,144000,-3993.25,-2170.0,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2022-02-21_13-15-46
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1656.1096610936177
  episode_reward_mean: -3733.837260554247
  episode_reward_min: -7370.0
  episodes_this_iter: 16
  episodes_total: 592
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 4.02581003481342
          entropy_coeff: 0.0
          kl: 0.019840836887112877
          policy_loss: -0.07807593100994666
          total_loss: 230546.85893344675
          vf_explained_var: -0.006779592460201633
          vf_loss: 230546.90818212365
        model: {}
    num_agent_steps_sampled: 148000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,37,895.233,148000,-3733.84,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,37,895.233,148000,-3733.84,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,37,895.233,148000,-3733.84,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,37,895.233,148000,-3733.84,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2022-02-21_13-16-09
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1656.1096610936177
  episode_reward_mean: -3474.661320950507
  episode_reward_min: -7370.0
  episodes_this_iter: 16
  episodes_total: 608
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.9713146486589985
          entropy_coeff: 0.0
          kl: 0.017801496178716743
          policy_loss: -0.061773852599654545
          total_loss: 170006.28035403017
          vf_explained_var: -0.00653368567907682
          vf_loss: 170006.3153454196
        model: {}
    num_agent_steps_sampled: 152000
    num_agent_steps_

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,38,918.203,152000,-3474.66,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,38,918.203,152000,-3474.66,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,38,918.203,152000,-3474.66,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,38,918.203,152000,-3474.66,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,38,918.203,152000,-3474.66,-1656.11,-7370.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2022-02-21_13-16-32
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1656.1096610936177
  episode_reward_mean: -3229.7091812748254
  episode_reward_min: -6570.0
  episodes_this_iter: 16
  episodes_total: 624
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.884974431222485
          entropy_coeff: 0.0
          kl: 0.017402564408095553
          policy_loss: -0.045454139366585725
          total_loss: 133931.64290574598
          vf_explained_var: 0.004156598737162929
          vf_loss: 133931.661765478
        model: {}
    num_agent_steps_sampled: 156000
    num_agent_steps_t

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,39,941.183,156000,-3229.71,-1656.11,-6570.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,39,941.183,156000,-3229.71,-1656.11,-6570.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,39,941.183,156000,-3229.71,-1656.11,-6570.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,39,941.183,156000,-3229.71,-1656.11,-6570.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2022-02-21_13-16-55
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1656.1096610936177
  episode_reward_mean: -3094.857607359815
  episode_reward_min: -6366.605406576823
  episodes_this_iter: 16
  episodes_total: 640
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.940491825021723
          entropy_coeff: 0.0
          kl: 0.01824063883819496
          policy_loss: -0.07910327306818417
          total_loss: 138028.01776503696
          vf_explained_var: 0.021095533973427227
          vf_loss: 138028.06911332326
        model: {}
    num_agent_steps_sampled: 160000
    num_age

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,40,963.889,160000,-3094.86,-1656.11,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,40,963.889,160000,-3094.86,-1656.11,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,40,963.889,160000,-3094.86,-1656.11,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,40,963.889,160000,-3094.86,-1656.11,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2022-02-21_13-17-17
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1170.0
  episode_reward_mean: -2851.7288334282944
  episode_reward_min: -6366.605406576823
  episodes_this_iter: 16
  episodes_total: 656
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.8521944592075963
          entropy_coeff: 0.0
          kl: 0.01949135385093379
          policy_loss: -0.06780243541504587
          total_loss: 119653.62792128696
          vf_explained_var: 0.0097181829714006
          vf_loss: 119653.66579826109
        model: {}
    num_agent_steps_sampled: 164000
    num_agent_steps_tra

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,41,986.537,164000,-2851.73,-1170.0,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,41,986.537,164000,-2851.73,-1170.0,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,41,986.537,164000,-2851.73,-1170.0,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,41,986.537,164000,-2851.73,-1170.0,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,41,986.537,164000,-2851.73,-1170.0,-6366.61,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2022-02-21_13-17-40
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -1170.0
  episode_reward_mean: -2661.801038260536
  episode_reward_min: -5270.0
  episodes_this_iter: 16
  episodes_total: 672
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.8129759678276636
          entropy_coeff: 0.0
          kl: 0.017685519453149817
          policy_loss: -0.05331331927589671
          total_loss: 99627.3164655788
          vf_explained_var: -0.010741748925178282
          vf_loss: 99627.34320449008
        model: {}
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 16800

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,42,1009.44,168000,-2661.8,-1170.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,42,1009.44,168000,-2661.8,-1170.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,42,1009.44,168000,-2661.8,-1170.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,42,1009.44,168000,-2661.8,-1170.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2022-02-21_13-18-03
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -770.0
  episode_reward_mean: -2530.862680433596
  episode_reward_min: -5270.0
  episodes_this_iter: 16
  episodes_total: 688
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.810170554345654
          entropy_coeff: 0.0
          kl: 0.016905495324131957
          policy_loss: -0.06896477844916604
          total_loss: 106426.5530094926
          vf_explained_var: -0.014365260383134248
          vf_loss: 106426.59575090306
        model: {}
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 17200

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,43,1032.36,172000,-2530.86,-770.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,43,1032.36,172000,-2530.86,-770.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,43,1032.36,172000,-2530.86,-770.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,43,1032.36,172000,-2530.86,-770.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,43,1032.36,172000,-2530.86,-770.0,-5270.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2022-02-21_13-18-26
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -770.0
  episode_reward_mean: -2344.9550374249084
  episode_reward_min: -4870.0
  episodes_this_iter: 16
  episodes_total: 704
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.7494385109152844
          entropy_coeff: 0.0
          kl: 0.017605889155894235
          policy_loss: -0.07635856957102234
          total_loss: 82565.21683887769
          vf_explained_var: -0.05636068768398736
          vf_loss: 82565.26618678596
        model: {}
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 17600

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,44,1055.23,176000,-2344.96,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,44,1055.23,176000,-2344.96,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,44,1055.23,176000,-2344.96,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,44,1055.23,176000,-2344.96,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2022-02-21_13-18-49
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -770.0
  episode_reward_mean: -2211.584581675789
  episode_reward_min: -4870.0
  episodes_this_iter: 16
  episodes_total: 720
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.7393719668029455
          entropy_coeff: 0.0
          kl: 0.01755578422217452
          policy_loss: -0.06655801717211964
          total_loss: 72631.76002210392
          vf_explained_var: -0.06359713462091261
          vf_loss: 72631.80001023815
        model: {}
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,45,1078.25,180000,-2211.58,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,45,1078.25,180000,-2211.58,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,45,1078.25,180000,-2211.58,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,45,1078.25,180000,-2211.58,-770.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2022-02-21_13-19-12
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -670.0
  episode_reward_mean: -1986.4572108506272
  episode_reward_min: -4870.0
  episodes_this_iter: 16
  episodes_total: 736
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.616032679875692
          entropy_coeff: 0.0
          kl: 0.01704902148122316
          policy_loss: -0.06663125819972246
          total_loss: 61003.37914566532
          vf_explained_var: -0.1265448999020361
          vf_loss: 61003.419806630125
        model: {}
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,46,1101.32,184000,-1986.46,-670.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,46,1101.32,184000,-1986.46,-670.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,46,1101.32,184000,-1986.46,-670.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,46,1101.32,184000,-1986.46,-670.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,46,1101.32,184000,-1986.46,-670.0,-4870.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2022-02-21_13-19-36
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -570.0
  episode_reward_mean: -1904.4475413728703
  episode_reward_min: -5670.0
  episodes_this_iter: 16
  episodes_total: 752
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.0005000000000000002
          entropy: 3.7997066231184107
          entropy_coeff: 0.0
          kl: 0.020060427254390906
          policy_loss: -0.09691279184433722
          total_loss: 134459.50224359985
          vf_explained_var: 0.038237263887159284
          vf_loss: 134459.56786741852
        model: {}
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,47,1124.47,188000,-1904.45,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,47,1124.47,188000,-1904.45,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,47,1124.47,188000,-1904.45,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,47,1124.47,188000,-1904.45,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2022-02-21_13-20-00
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -570.0
  episode_reward_mean: -1739.1054202801702
  episode_reward_min: -5670.0
  episodes_this_iter: 16
  episodes_total: 768
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.6239966695026684
          entropy_coeff: 0.0
          kl: 0.013252617628235654
          policy_loss: -0.06691410076874559
          total_loss: 42317.28182097404
          vf_explained_var: -0.041805455633389055
          vf_loss: 42317.31862084173
        model: {}
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 1920

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,48,1148.84,192000,-1739.11,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,48,1148.84,192000,-1739.11,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,48,1148.84,192000,-1739.11,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,48,1148.84,192000,-1739.11,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,48,1148.84,192000,-1739.11,-570.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2022-02-21_13-20-23
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -370.0
  episode_reward_mean: -1542.0876370750077
  episode_reward_min: -5670.0
  episodes_this_iter: 16
  episodes_total: 784
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.6064361018519246
          entropy_coeff: 0.0
          kl: 0.0130513339943941
          policy_loss: -0.058008816348569046
          total_loss: 31288.08293082945
          vf_explained_var: -0.10819801521557633
          vf_loss: 31288.111211698797
        model: {}
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 19600

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,49,1171.75,196000,-1542.09,-370.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,49,1171.75,196000,-1542.09,-370.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,49,1171.75,196000,-1542.09,-370.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,49,1171.75,196000,-1542.09,-370.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2022-02-21_13-20-46
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -270.0
  episode_reward_mean: -1448.4750908915023
  episode_reward_min: -5670.0
  episodes_this_iter: 16
  episodes_total: 800
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.6443999434030183
          entropy_coeff: 0.0
          kl: 0.014616889607559021
          policy_loss: -0.07177391991880472
          total_loss: 80797.80567928847
          vf_explained_var: -0.005419729666043353
          vf_loss: 80797.84430574806
        model: {}
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 2000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,50,1194.88,200000,-1448.48,-270.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,50,1194.88,200000,-1448.48,-270.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,50,1194.88,200000,-1448.48,-270.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,50,1194.88,200000,-1448.48,-270.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,50,1194.88,200000,-1448.48,-270.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2022-02-21_13-21-09
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -170.0
  episode_reward_mean: -1243.3771044883756
  episode_reward_min: -5670.0
  episodes_this_iter: 16
  episodes_total: 816
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.503623661687297
          entropy_coeff: 0.0
          kl: 0.01199739934976736
          policy_loss: -0.05623739543671329
          total_loss: 26757.153984480006
          vf_explained_var: -0.026349396346717752
          vf_loss: 26757.18283875168
        model: {}
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 20400

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,51,1217.8,204000,-1243.38,-170.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,51,1217.8,204000,-1243.38,-170.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,51,1217.8,204000,-1243.38,-170.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,51,1217.8,204000,-1243.38,-170.0,-5670.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2022-02-21_13-21-32
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -70.0
  episode_reward_mean: -1179.396824999575
  episode_reward_min: -6770.0
  episodes_this_iter: 16
  episodes_total: 832
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.488208678204526
          entropy_coeff: 0.0
          kl: 0.009861059703060834
          policy_loss: -0.06250171609403145
          total_loss: 71633.59497286889
          vf_explained_var: -0.07708044507170236
          vf_loss: 71633.6340372065
        model: {}
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
  

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,52,1240.73,208000,-1179.4,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,52,1240.73,208000,-1179.4,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,52,1240.73,208000,-1179.4,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,52,1240.73,208000,-1179.4,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2022-02-21_13-21-55
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -70.0
  episode_reward_mean: -1015.4677590350772
  episode_reward_min: -6770.0
  episodes_this_iter: 16
  episodes_total: 848
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.5699142966219175
          entropy_coeff: 0.0
          kl: 0.012666638467773125
          policy_loss: -0.07231560580592643
          total_loss: 42826.161067380184
          vf_explained_var: 0.04839852452278137
          vf_loss: 42826.20504255397
        model: {}
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,53,1263.68,212000,-1015.47,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,53,1263.68,212000,-1015.47,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,53,1263.68,212000,-1015.47,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,53,1263.68,212000,-1015.47,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,53,1263.68,212000,-1015.47,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2022-02-21_13-22-18
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -70.0
  episode_reward_mean: -876.1977942070884
  episode_reward_min: -6770.0
  episodes_this_iter: 16
  episodes_total: 864
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.386086070153021
          entropy_coeff: 0.0
          kl: 0.011480457487043743
          policy_loss: -0.04722530266034707
          total_loss: 7961.508135330037
          vf_explained_var: -0.007291079016141994
          vf_loss: 7961.52920656922
        model: {}
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,54,1286.51,216000,-876.198,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,54,1286.51,216000,-876.198,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,54,1286.51,216000,-876.198,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,54,1286.51,216000,-876.198,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2022-02-21_13-22-41
  done: false
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: -70.0
  episode_reward_mean: -754.8994093737446
  episode_reward_min: -6770.0
  episodes_this_iter: 16
  episodes_total: 880
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.408102039624286
          entropy_coeff: 0.0
          kl: 0.011961927907587117
          policy_loss: -0.05152815890007763
          total_loss: 7568.213286894112
          vf_explained_var: -0.07576311237068586
          vf_loss: 7568.237588205645
        model: {}
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
 

Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,55,1309.59,220000,-754.899,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,55,1309.59,220000,-754.899,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,55,1309.59,220000,-754.899,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,55,1309.59,220000,-754.899,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00002,RUNNING,127.0.0.1:25681,0.0005,55,1309.59,220000,-754.899,-70.0,-6770.0,250
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250


Result for PPOTrainer_Multi_passanger_env_29773_00002:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2022-02-21_13-23-04
  done: true
  episode_len_mean: 250.0
  episode_media: {}
  episode_reward_max: 30.0
  episode_reward_mean: -666.2275985470728
  episode_reward_min: -6770.0
  episodes_this_iter: 16
  episodes_total: 896
  experiment_id: 3c9a11d04f3447e996eb7e80c237bdf2
  hostname: Wyames-MBP.attlocal.net
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 0.0005000000000000002
          entropy: 3.460679397531735
          entropy_coeff: 0.0
          kl: 0.01127065493350863
          policy_loss: -0.06947654788131996
          total_loss: 38959.793184801325
          vf_explained_var: -0.22896251466966444
          vf_loss: 38959.837273841775
        model: {}
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
  



Trial name,status,loc,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_Multi_passanger_env_29773_00000,TERMINATED,127.0.0.1:25399,0.005,18,482.663,72000,-961.03,30.0,-2170.0,250
PPOTrainer_Multi_passanger_env_29773_00001,TERMINATED,127.0.0.1:25400,0.0001,40,1028.81,160000,-744.458,54.8084,-7156.57,250
PPOTrainer_Multi_passanger_env_29773_00002,TERMINATED,127.0.0.1:25681,0.0005,56,1332.87,224000,-666.228,30.0,-6770.0,250


2022-02-21 13:23:05,473	INFO tune.py:636 -- Total run time: 1845.11 seconds (1844.65 seconds for the tuning loop).


In [17]:
# list of lists: one list per checkpoint; each checkpoint list contains
# 1st the path, 2nd the metric value
checkpoints = analysis.get_trial_checkpoints_paths(
    trial=analysis.get_best_trial("episode_reward_mean", mode='max'),
    metric="episode_reward_mean")


In [18]:
agent = ppo.PPOTrainer(
    config={'env_config': env_config, 'framework': 'torch'}, env=chosen_env)
agent.restore(checkpoints[0][0])


2022-02-21 13:23:05,830	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-02-21 13:23:05,831	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-02-21 13:23:12,563	INFO trainable.py:472 -- Restored on 127.0.0.1 from checkpoint: /Users/wyamebenslimane/ray_results/PPOTrainer_2022-02-21_12-52-20/PPOTrainer_Multi_passanger_env_29773_00002_2_lr=0.0005_2022-02-21_12-52-22/checkpoint_000056/checkpoint-56
2022-02-21 13:23:12,578	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 56, '_timesteps_total': 224000, '_time_total': 1332.8652975559235, '_episodes_total': 896}


In [20]:
env = env_multi_creator(env_config)
to_print = True
num_episodes = 10

env.reset()
total_reward = []
box_flag = isinstance(env.observation_space, gym.spaces.Box)
for episode in range(num_episodes):
    env.reset()
    state = env.reset()
    state_0 = state.copy()
    done = False
    ep_reward = 0
    step = 0
    if to_print:
        print("****************************************************")
        print("EPISODE ", episode)
        # print('Initial state', state_0)

    while not done:
        # print(state)
        action = agent.compute_action(state)
        new_state, reward, done, info = env.step(action)

        ep_reward += reward
        step += 1

        if done:
            total_reward.append(ep_reward)
            if to_print:
                # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)
                # num_transported= list(state_0[1:]).count(1) - list(new_state[1:]).count(1)
                print('Initial state : ', state_0,
                      ' Final state : ', new_state)

                # We print the number of step it took.
                print(
                    f"Number of steps {step}, customers transported are {env.customers}, the total reward is {ep_reward}")

            break
        state = new_state
        # state = tuple(new_state) if box_flag else new_state

if to_print:
    print('Mean score: %.3f of %i games!' %
          (np.mean(total_reward), num_episodes))
# return np.mean(total_reward)




****************************************************
EPISODE  0
Initial state :  OrderedDict([('position', array([54])), ('state', array([2., 2., 2.]))])  Final state :  OrderedDict([('position', array([54])), ('state', array([2., 2., 2.]))])
Number of steps 250, customers transported are 1, the total reward is -261.886700608727
****************************************************
EPISODE  1
Initial state :  OrderedDict([('position', array([8])), ('state', array([2., 2., 2.]))])  Final state :  OrderedDict([('position', array([8])), ('state', array([2., 2., 2.]))])
Number of steps 250, customers transported are 0, the total reward is 30
****************************************************
EPISODE  2
Initial state :  OrderedDict([('position', array([48])), ('state', array([2., 2., 2.]))])  Final state :  OrderedDict([('position', array([48])), ('state', array([2., 2., 2.]))])
Number of steps 250, customers transported are 0, the total reward is 30
***************************************