In [10]:
import pandas as pd
import geopandas as geopd

import numpy as np
import random
from sklearn.model_selection import train_test_split

import gym
from gym.wrappers import TimeLimit
# import time

import warnings
warnings.filterwarnings('ignore')
import itertools
import matplotlib.pyplot as plt

# from gym.spaces import Dict,Box,Discrete

In [11]:
import ray
from ray import tune
from ray.rllib.agents import dqn
from ray.tune.registry import register_env
from typing import Dict
# import ray
# from ray import tune
from ray.rllib.env import BaseEnv
from ray.rllib.policy import Policy
# from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
from ray.rllib.agents.callbacks import DefaultCallbacks

In [12]:
## Env params:

## import data
nb_of_data = 'Manhattan' 

data  = pd.read_csv(f'./Data/demand_{nb_of_data}.csv')
distance_data = pd.read_csv(f'./Data/distance_info_{nb_of_data}.csv')
max_episode_steps = 200
max_client = 100

env_config = {'demand_data' : data,
              'distance_data':distance_data,
              'client_limit':max_client,
              'customers_per_taxi':2,
              'max_timestep': max_episode_steps,
             'number_taxis':1}
del data, distance_data
dict_results = {}

env_name = f"{env_config['number_taxis']}_TaxiAgent_{env_config['customers_per_taxi']}_passangers_{nb_of_data}"
print(env_config['demand_data'].Demand.sum())

126646


## Env

In [34]:
class Multi_passanger_taxi(gym.Env):
    def __init__(self,env_config):
        ## Get data from input
        self.data = env_config['demand_data'].sort_values(by=['Demand'],ascending=False,ignore_index=True)
        self.distance_data = env_config['distance_data']
        # self.max_timestep =  env_config['max_timestep']
        self.n = env_config['customers_per_taxi'] # max number of customer per taxi

        self.locations =  np.unique(self.distance_data.PULocationID.values)
        self.nb_locations = len(self.locations)

        self.locations = dict(zip(range(self.nb_locations),self.locations))
        self.location_to_index = dict((v, k) for k, v in self.locations.items())

        self.customers=0
        self.custumer_count = self.data.Demand.sum() 
        self.customers_limit = min(self.custumer_count,env_config['client_limit'])
        self.current_client_count=0
        
        self.agents =[f'taxi_{i}' for i in range(env_config['number_taxis'])]
        
        self.action_space = self.get_action_space()

        # Define Observation space 
        self.observation_space = self.get_obs_space()
        # ## Initialise the space  
        self.reset()    
        
    def check_empty_positions(self,state_obs):
        empty_position = np.where(state_obs==-1)
        return min(empty_position,default=None)

        
    def get_distane(self,PU_location,DO_location):
            distance = self.distance_data[(self.distance_data.PULocationID==PU_location) & 
                                          (self.distance_data.DOLocationID==DO_location)]['distance'].values
            return distance[0]
        

    def get_action_space(self):
        action_dict = {'move':gym.spaces.Discrete(self.nb_locations),
                                                'pickup' : gym.spaces.Discrete(1)}
        
        return gym.spaces.Dict({agent:gym.spaces.Dict(action_dict) for agent in self.agents})
    
    
    def get_obs_space(self):
        single_position_obs = {'position': gym.spaces.Box(low = np.array([0]),high=np.array([self.nb_locations-1]),dtype=int),
                      'state':gym.spaces.Box(low = -1,high=self.nb_locations-1,shape=(self.n,),dtype=int),}
        return gym.spaces.Dict({agent:gym.spaces.Dict(single_position_obs) for agent in self.agents})
    
    
    
    def pick_up_reward(self,observation):
        state_obs = observation['state']
        empty_location = self.check_empty_positions(state_obs)
        # print('state', state_obs, 'empty location', empty_location)
        if empty_location is None or len(empty_location)==0:
            return 0, observation
        else:
            taxi_location = self.locations[observation['position'][0]]
            if taxi_location in self.demand_dict.keys(): ## Current location has a client:                   
                destination_location = self.demand_dict[taxi_location].pop(0)
                observation['state'][empty_location[0]] = self.location_to_index[destination_location]
                if self.demand_dict[taxi_location]==[]:
                    self.demand_dict.pop(taxi_location)
                return 10,observation
        return 0,observation
            
            
            
    def move_reward(self,destination_id,observation):
        taxi_location = self.locations[observation['position'][0]]
        destination = self.locations[destination_id]
        if destination_id in observation['state']:
            reward = 0
            destination_indexes, = np.where(observation['state']==destination_id)
            for destination_index in destination_indexes:
                observation['state'][destination_index]=-1
                self.customers+=1
                reward += 100
            return reward, observation
        else:
            return -10,observation
            # if self.check_empty_positions(observation['state']) is None:
            #     return -self.get_distane(taxi_location,destination)*10,observation
            # else: 
            #     return -self.get_distane(taxi_location,destination)*30,observation

            
    def single_step(self, action,observation):
        pu_reward ,move_reward =0,0
        # observation = self.state['taxi_0']
        # action = action['taxi_0']
        # if action['pickup']: ## If action is pick up client:
        pu_reward, observation = self.pick_up_reward(observation)   
        if observation['position'][0]==action['move']: #and action['pickup']==0:
            move_reward =-10
        else:
            move_reward ,observation = self.move_reward(action['move'],observation)
        # print(action, 'pick_up rew',pu_reward, 'move rew',move_reward, 'obs', observation)
        # Return step information
        return observation, pu_reward+move_reward
        


    def step(self, action):
        done = False
        reward =0
        for agent in self.agents:
            taxi_observation = self.state[agent]
            taxi_action = action[agent]
            taxi_observation, agent_reward = self.single_step(taxi_action,taxi_observation)
            self.state[agent] = taxi_observation
            reward += agent_reward
            
        self.time_step +=1

        if self.customers == self.customers_limit or len(self.demand_dict)==0:
            done=True

        # Set placeholder for info
        info = {'customers_dropped': self.customers,
               'done': done,
               'reward': reward}

        # Return step information
        return self.state, reward/len(self.agents), done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.time_step = 0 
        self.customers=0
        # self.taxi_path =[]
        self.state = {agent:{'position':np.random.randint([0],[self.nb_locations]),
                            'state': -np.ones(self.n)} for agent in self.agents}
        self.demand_dict = {pu :[] for pu in self.data.PU_LocationID}
        for index in range(len(self.data)):
            row_index = self.data.loc[index]
            self.demand_dict[row_index.PU_LocationID]+=[row_index.DO_LocationID for _ in range(row_index.Demand)]

        return self.state



## Wrapper

In [35]:
class Obs_Wrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        assert isinstance(env.observation_space, gym.spaces.Dict), \
            "Should only be used to wrap Discrete envs."
        self.old_observation_space = self.observation_space
        self.agent = self.old_observation_space.spaces.keys()
        
        self.observation_space, self.shape = self.create_space()
        
    def create_space(self):
        # nb_agent = len(self.agent)
        dict_value1 = list(self.old_observation_space.spaces.values())[0].spaces.values()
        high,low,shape =[],[],0
        for boxes in dict_value1:
            b_high,b_low, b_shape = boxes.high, boxes.low, boxes.shape
            high= np.append(high,b_high)
            low = np.append(low,b_low)
            shape+=b_shape[0]
        high,low = np.array([high for _ in self.agent]), np.array([low for _ in self.agent])
        shape = (len(self.agent),shape)
        return gym.spaces.Box(high=high,low=low,dtype=int), shape
        
    def observation(self, obs):
        new_obs = np.zeros(self.shape)
        for id_agent, agent in enumerate(self.agent):
            new_obs[id_agent][0] = obs[agent]['position'] 
            new_obs[id_agent][1:] = obs[agent]['state'] 
        return new_obs

In [36]:
class Action_Wrapper(gym.ActionWrapper):
    def __init__(self, env):
        super().__init__(env)
        assert isinstance(env.action_space, gym.spaces.Dict), \
            "Should only be used to wrap Discrete envs."
        self.old_action_space = self.action_space
        self.agent = list(self.old_action_space.spaces.keys())
        self.action_space = self.create_space()
        
        
    def create_space(self):
        nb_agent = len(self.agent)
        dict_value1 = list(self.old_action_space.spaces.values())[0].spaces.values()
        shape = np.array([boxes.n for boxes in dict_value1])
        # print('shape',shape)
        action_agent = [[i,j] for i in range(shape[0]) for j in range(shape[1])]
        # print('action',action_agent)
        str_exe = '[('+''.join([f'i{i},' for i in range(nb_agent)])+') '+' '.join([f'for i{i} in {action_agent}' for i in range(nb_agent)])+']'
        # print(str_exe)
        self.all_action_combo = eval(str_exe)
        # print(self.all_action_combo)
        return gym.spaces.Discrete(len(self.all_action_combo))
        
    def action(self, action):
        # print(action)
        # print(self.agent)
        # print(self.all_action_combo)
        action = self.all_action_combo[action]
        # print(action)
        new_action = {}
        for index, elem in enumerate(action):
            new_action[self.agent[index]]={'move' : elem[0],'pickup': elem[1]}
        return new_action

## Callbacks

In [37]:
class MyCallbacks(DefaultCallbacks):
    def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv,
                         policies: Dict[str, Policy],
                         episode: MultiAgentEpisode, **kwargs):
        episode.custom_metrics["nb_customers_dropped"] = []
        # episode.user_data["nb_customers_picked"] = []

    def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                       policies: Dict[str, Policy], episode: MultiAgentEpisode,
                       **kwargs):
        info_dict = episode.last_info_for()
        episode.custom_metrics["nb_customers_dropped"].append(info_dict['customers_dropped'])
        
    def on_train_result(self, trainer, result: dict, **kwargs):
        result["callback_ok"] = True



## env register

In [38]:
# dqn.APEX_DEFAULT_CONFIG


# 'gamma': 0.99,
# 'lr': 0.0005,
# 'clip_rewards': None,
# 'lr_schedule': None,
# 'adam_epsilon': 1e-08,

# 'learning_starts': 50000,

# dqn.DEFAULT_CONFIG
# 'gamma': 0.99,
# 'learning_starts': 1000
#  'lr': 0.0005,

In [39]:
def env_multi_creator(env_config):
    env=TimeLimit(Action_Wrapper(Obs_Wrapper(Multi_passanger_taxi(env_config))),env_config['max_timestep'])# return an env instance
    # env=TimeLimit(Multi_passanger_taxi(env_config),env_config['max_timestep'])# return an env instance

    env.seed(1042)
    return env


In [40]:
register_env(env_name, env_multi_creator)

## test 

In [41]:
config = {
    "env": env_name,
    'env_config': env_config,
    'framework': 'torch',
    'callbacks':MyCallbacks,
    # 'gpu':None,
    "lr": 0.0005,
     # "num_gpus_per_worker": 0,
    'gamma': 0.9,
    'seed':1042
}
# trainer_config['env_config']=env_config
# trainer_config['framework']='torch'
client_dropped_criteria = 10*env_config['number_taxis']*env_config['customers_per_taxi']

stop_criteria = {'agent_timesteps_total':100000}




In [None]:
# tune.run() allows setting a custom log directory (other than ``~/ray-results``)
# and automatically saving the trained agent
import time
ray.shutdown()
ray.init()
register_env(env_name, env_multi_creator)
t1 = time.time()
analysis = ray.tune.run(
    'DQN',
    config=config,
    # local_dir=log_dir,
    stop=stop_criteria,
    checkpoint_at_end=True)

time.time()-t1

Trial name,status,loc
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,PENDING,


[2m[36m(DQNTrainer pid=61530)[0m 2022-04-07 11:39:04,663	INFO simple_q.py:153 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQNTrainer pid=61530)[0m 2022-04-07 11:39:04,663	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530


Trial name,status,loc
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530


Trial name,status,loc
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 1000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 16
    nb_customers_dropped_mean: 8.4
    nb_customers_dropped_min: 4
  date: 2022-04-07_11-39-19
  done: false
  episode_len_mean: 200.0
  episode_media: {}
  episode_reward_max: -140.0
  episode_reward_mean: -1014.0
  episode_reward_min: -1520.0
  episodes_this_iter: 5
  episodes_total: 5
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 1000
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 6.222083568572998
          max_q: 0.009224439971148968
          mean_q: -0.0033648316748440266
          min_q: -0.01613672822713852
        mean_td_error: -10.005289077758789
        model: {}
        td_error:
        - 9.990676879882812
        - 9.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,1,11.0136,1000,-1014,-140,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,1,11.0136,1000,-1014,-140,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,1,11.0136,1000,-1014,-140,-1520,200


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 2000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 16
    nb_customers_dropped_mean: 7.6
    nb_customers_dropped_min: 4
  date: 2022-04-07_11-39-32
  done: false
  episode_len_mean: 200.0
  episode_media: {}
  episode_reward_max: -140.0
  episode_reward_mean: -1106.0
  episode_reward_min: -1520.0
  episodes_this_iter: 5
  episodes_total: 10
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 1504
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.8506908416748047
          max_q: 206.91709899902344
          mean_q: -1.5198009014129639
          min_q: -15.519676208496094
        mean_td_error: -38.699378967285156
        model: {}
        td_error:
        - 7.750701904296875
        - -1.1508

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,2,24.612,2000,-1106,-140,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,2,24.612,2000,-1106,-140,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,2,24.612,2000,-1106,-140,-1520,200


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 3000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 45
    nb_customers_dropped_mean: 11.866666666666667
    nb_customers_dropped_min: 4
  date: 2022-04-07_11-39-45
  done: false
  episode_len_mean: 200.0
  episode_media: {}
  episode_reward_max: 3210.0
  episode_reward_mean: -614.0
  episode_reward_min: -1520.0
  episodes_this_iter: 5
  episodes_total: 15
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 2512
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.9404913187026978
          max_q: 91.17200469970703
          mean_q: -1.523717999458313
          min_q: -17.597400665283203
        mean_td_error: -67.86106872558594
        model: {}
        td_error:
        - 14.914793014526367
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,3,37.714,3000,-614,3210,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,3,37.714,3000,-614,3210,-1520,200


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 4000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 121
    nb_customers_dropped_mean: 22.15
    nb_customers_dropped_min: 4
  date: 2022-04-07_11-39-59
  done: false
  episode_len_mean: 200.0
  episode_media: {}
  episode_reward_max: 12080.0
  episode_reward_mean: 577.0
  episode_reward_min: -1520.0
  episodes_this_iter: 5
  episodes_total: 20
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 3520
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5984104871749878
          max_q: 58.87080764770508
          mean_q: 12.469919204711914
          min_q: -12.859285354614258
        mean_td_error: -54.27497100830078
        model: {}
        td_error:
        - 8.777766227722168
        - -0.12196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,4,51.7131,4000,577,12080,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,4,51.7131,4000,577,12080,-1520,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,4,51.7131,4000,577,12080,-1520,200


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 5000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 129
    nb_customers_dropped_mean: 23.64
    nb_customers_dropped_min: 2
  date: 2022-04-07_11-40-14
  done: false
  episode_len_mean: 200.0
  episode_media: {}
  episode_reward_max: 12960.0
  episode_reward_mean: 750.8
  episode_reward_min: -1750.0
  episodes_this_iter: 5
  episodes_total: 25
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 4528
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.3967897593975067
          max_q: 182.1224822998047
          mean_q: 14.864895820617676
          min_q: -10.986762046813965
        mean_td_error: -54.60300827026367
        model: {}
        td_error:
        - -3.580709457397461
        - -158.07

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,5,66.2523,5000,750.8,12960,-1750,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,5,66.2523,5000,750.8,12960,-1750,200


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,5,66.2523,5000,750.8,12960,-1750,200


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 6000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 28.466666666666665
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-40-28
  done: false
  episode_len_mean: 199.83333333333334
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1313.3333333333333
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 30
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 5536
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.35734403133392334
          max_q: 99.52172088623047
          mean_q: 12.308758735656738
          min_q: -13.610782623291016
        mean_td_error: -52.90796661376953
        model: {}
        td_error:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,6,80.4512,6000,1313.33,13800,-1980,199.833


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,6,80.4512,6000,1313.33,13800,-1980,199.833


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 7000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.97142857142857
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-40-41
  done: false
  episode_len_mean: 197.88571428571427
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1274.857142857143
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 35
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 6544
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4118599593639374
          max_q: 166.30905151367188
          mean_q: 26.563695907592773
          min_q: -15.366924285888672
        mean_td_error: -35.90186309814453
        model: {}
        td_error:
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,7,93.348,7000,1274.86,13800,-1980,197.886


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,7,93.348,7000,1274.86,13800,-1980,197.886


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,7,93.348,7000,1274.86,13800,-1980,197.886


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 8000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 26.775
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-40-54
  done: false
  episode_len_mean: 198.15
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1138.0
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 40
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 7552
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.8148234486579895
          max_q: 145.43634033203125
          mean_q: 48.37471389770508
          min_q: -6.028237342834473
        mean_td_error: -4.7987799644470215
        model: {}
        td_error:
        - -1.3522071838378906
        - -0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,8,106.252,8000,1138,13800,-1980,198.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,8,106.252,8000,1138,13800,-1980,198.15


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 9000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 24.733333333333334
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-41-07
  done: false
  episode_len_mean: 198.35555555555555
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 902.0
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 45
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 8560
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.23955732583999634
          max_q: 115.96517944335938
          mean_q: 23.80704116821289
          min_q: -1.935628890991211
        mean_td_error: -16.90074920654297
        model: {}
        td_error:
        - 1.82756996

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,9,119.152,9000,902,13800,-1980,198.356


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,9,119.152,9000,902,13800,-1980,198.356


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,9,119.152,9000,902,13800,-1980,198.356


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 10000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 28.470588235294116
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-41-22
  done: false
  episode_len_mean: 194.68627450980392
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1386.862745098039
  episode_reward_min: -1980.0
  episodes_this_iter: 6
  episodes_total: 51
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 9568
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.9873400330543518
          max_q: 144.72799682617188
          mean_q: 45.492008209228516
          min_q: -0.2462301254272461
        mean_td_error: -7.242922782897949
        model: {}
        td_error:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,10,133.584,10000,1386.86,13800,-1980,194.686


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,10,133.584,10000,1386.86,13800,-1980,194.686


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 11000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.321428571428573
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-41-35
  done: false
  episode_len_mean: 195.16071428571428
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1246.4285714285713
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 56
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 10576
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4257601201534271
          max_q: 182.59048461914062
          mean_q: 42.16636276245117
          min_q: -2.8829538822174072
        mean_td_error: -3.2120161056518555
        model: {}
        td_error:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,11,146.436,11000,1246.43,13800,-1980,195.161


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,11,146.436,11000,1246.43,13800,-1980,195.161


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,11,146.436,11000,1246.43,13800,-1980,195.161


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 12000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.9672131147541
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-41-48
  done: false
  episode_len_mean: 195.55737704918033
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1317.049180327869
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 61
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 11584
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.734185516834259
          max_q: 189.1101531982422
          mean_q: 34.01732635498047
          min_q: 4.193914413452148
        mean_td_error: -37.32621383666992
        model: {}
        td_error:
        - -7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,12,159.371,12000,1317.05,13800,-1980,195.557


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,12,159.371,12000,1317.05,13800,-1980,195.557


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 13000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.363636363636363
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-42-01
  done: false
  episode_len_mean: 195.8939393939394
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1242.5757575757575
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 66
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 12592
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4140828251838684
          max_q: 80.4045639038086
          mean_q: 19.39985466003418
          min_q: -9.372064590454102
        mean_td_error: -45.41444396972656
        model: {}
        td_error:
        -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,13,172.304,13000,1242.58,13800,-1980,195.894


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,13,172.304,13000,1242.58,13800,-1980,195.894


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 14000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 26.267605633802816
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-42-14
  done: false
  episode_len_mean: 196.18309859154928
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1114.3661971830986
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 71
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 13600
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5956582427024841
          max_q: 186.2776336669922
          mean_q: 37.728271484375
          min_q: -13.638498306274414
        mean_td_error: -48.361717224121094
        model: {}
        td_error:
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,14,185.215,14000,1114.37,13800,-1980,196.183


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,14,185.215,14000,1114.37,13800,-1980,196.183


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,14,185.215,14000,1114.37,13800,-1980,196.183


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 15000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.026315789473685
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-42-27
  done: false
  episode_len_mean: 195.1184210526316
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1221.7105263157894
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 76
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 14608
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4684898257255554
          max_q: 122.43592834472656
          mean_q: 31.81197738647461
          min_q: -14.83444881439209
        mean_td_error: -36.198753356933594
        model: {}
        td_error:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,15,198.109,15000,1221.71,13800,-1990,195.118


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,15,198.109,15000,1221.71,13800,-1990,195.118


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,15,198.109,15000,1221.71,13800,-1990,195.118


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 16000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.70731707317073
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-42-41
  done: false
  episode_len_mean: 194.3658536585366
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1310.121951219512
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 82
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 15616
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.6391549110412598
          max_q: 227.4032440185547
          mean_q: 30.38149642944336
          min_q: -10.831125259399414
        mean_td_error: -14.823646545410156
        model: {}
        td_error:
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,16,212.915,16000,1310.12,13800,-1990,194.366


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,16,212.915,16000,1310.12,13800,-1990,194.366


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 17000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 26.114942528735632
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-42-54
  done: false
  episode_len_mean: 194.68965517241378
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1121.0344827586207
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 87
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 16624
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.0949009656906128
          max_q: 247.28749084472656
          mean_q: 84.60653686523438
          min_q: 6.068482875823975
        mean_td_error: -8.310555458068848
        model: {}
        td_error:
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,17,225.852,17000,1121.03,13800,-1990,194.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,17,225.852,17000,1121.03,13800,-1990,194.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,17,225.852,17000,1121.03,13800,-1990,194.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,17,225.852,17000,1121.03,13800,-1990,194.69


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 18000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 26.152173913043477
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-50-12
  done: false
  episode_len_mean: 194.45652173913044
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1124.7826086956522
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 92
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 17632
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.48515263199806213
          max_q: 207.8985595703125
          mean_q: 48.045658111572266
          min_q: -8.202293395996094
        mean_td_error: -27.329130172729492
        model: {}
        td_error:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,18,663.557,18000,1124.78,13800,-1990,194.457


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,18,663.557,18000,1124.78,13800,-1990,194.457


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,18,663.557,18000,1124.78,13800,-1990,194.457


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 19000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.6734693877551
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-50-29
  done: false
  episode_len_mean: 192.78571428571428
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1313.3673469387754
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 98
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 18640
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.4659738540649414
          max_q: 143.16336059570312
          mean_q: 61.40558624267578
          min_q: 7.846523284912109
        mean_td_error: -15.454353332519531
        model: {}
        td_error:
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,19,680.043,19000,1313.37,13800,-1990,192.786


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,19,680.043,19000,1313.37,13800,-1990,192.786


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 20000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 27.7
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-50-43
  done: false
  episode_len_mean: 192.93
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1317.5
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 103
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 19648
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.281225323677063
          max_q: 224.15628051757812
          mean_q: 68.07369995117188
          min_q: 9.57990550994873
        mean_td_error: -16.338193893432617
        model: {}
        td_error:
        - -1.2235450744628906
        - 0.934

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,20,693.85,20000,1317.5,13800,-1990,192.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,20,693.85,20000,1317.5,13800,-1990,192.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,20,693.85,20000,1317.5,13800,-1990,192.93


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 21000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 30.87
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-50-57
  done: false
  episode_len_mean: 191.93
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1705.2
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 109
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 20656
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5147815346717834
          max_q: 163.6990966796875
          mean_q: 55.350162506103516
          min_q: -11.195943832397461
        mean_td_error: -33.86310577392578
        model: {}
        td_error:
        - -2.5204429626464844
        - 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,21,708.456,21000,1705.2,13800,-1990,191.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,21,708.456,21000,1705.2,13800,-1990,191.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,21,708.456,21000,1705.2,13800,-1990,191.93


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 22000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 32.68
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-51-10
  done: false
  episode_len_mean: 190.44
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1936.4
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 114
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 21664
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.8052248358726501
          max_q: 252.88482666015625
          mean_q: 58.98331832885742
          min_q: 10.48696517944336
        mean_td_error: -58.5485725402832
        model: {}
        td_error:
        - -196.0386962890625
        - -17.4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,22,721.424,22000,1936.4,13800,-1990,190.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,22,721.424,22000,1936.4,13800,-1990,190.44


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 23000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 31.71
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-51-25
  done: false
  episode_len_mean: 189.44
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 1835.3
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 120
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 22672
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.907010555267334
          max_q: 191.89053344726562
          mean_q: 64.9861068725586
          min_q: -7.820944309234619
        mean_td_error: -19.322471618652344
        model: {}
        td_error:
        - 1.7884130477905273
        - -24.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,23,735.866,23000,1835.3,13800,-1990,189.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,23,735.866,23000,1835.3,13800,-1990,189.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,23,735.866,23000,1835.3,13800,-1990,189.44


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 24000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 135
    nb_customers_dropped_mean: 33.59
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-51-39
  done: false
  episode_len_mean: 186.96
  episode_media: {}
  episode_reward_max: 13800.0
  episode_reward_mean: 2076.5
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 126
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 23680
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5784987807273865
          max_q: 275.0096435546875
          mean_q: 79.01899719238281
          min_q: 31.916183471679688
        mean_td_error: -7.086094379425049
        model: {}
        td_error:
        - -1.7422561645507812
        - 248

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,24,750.306,24000,2076.5,13800,-1990,186.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,24,750.306,24000,2076.5,13800,-1990,186.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,24,750.306,24000,2076.5,13800,-1990,186.96


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 25000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 125
    nb_customers_dropped_mean: 31.74
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-51-52
  done: false
  episode_len_mean: 187.01
  episode_media: {}
  episode_reward_max: 12520.0
  episode_reward_mean: 1860.9
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 131
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 24688
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5379033088684082
          max_q: 139.6329803466797
          mean_q: 53.73530197143555
          min_q: -3.9887452125549316
        mean_td_error: -19.864727020263672
        model: {}
        td_error:
        - -2.6937384605407715
        - -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,25,763.262,25000,1860.9,12520,-1990,187.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,25,763.262,25000,1860.9,12520,-1990,187.01


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 26000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 125
    nb_customers_dropped_mean: 31.15
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-52-05
  done: false
  episode_len_mean: 187.7
  episode_media: {}
  episode_reward_max: 12520.0
  episode_reward_mean: 1787.1
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 136
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 25696
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.8977457880973816
          max_q: 131.1811065673828
          mean_q: 52.41145706176758
          min_q: -9.275262832641602
        mean_td_error: -43.27511978149414
        model: {}
        td_error:
        - 48.608482360839844
        - -195.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,26,776.209,26000,1787.1,12520,-1990,187.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,26,776.209,26000,1787.1,12520,-1990,187.7


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 27000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 125
    nb_customers_dropped_mean: 31.99
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-52-18
  done: false
  episode_len_mean: 187.1
  episode_media: {}
  episode_reward_max: 12520.0
  episode_reward_mean: 1889.3
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 141
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 26704
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.45242393016815186
          max_q: 126.8144302368164
          mean_q: 50.50609588623047
          min_q: -2.5489349365234375
        mean_td_error: -34.286277770996094
        model: {}
        td_error:
        - -164.08575439453125
        - -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,27,789.14,27000,1889.3,12520,-1990,187.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,27,789.14,27000,1889.3,12520,-1990,187.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,27,789.14,27000,1889.3,12520,-1990,187.1


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 28000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 34.39
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-52-33
  done: false
  episode_len_mean: 186.59
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2169.4
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 147
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 27712
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5954862833023071
          max_q: 243.318359375
          mean_q: 72.90321350097656
          min_q: 29.638437271118164
        mean_td_error: -24.504714965820312
        model: {}
        td_error:
        - -200.04104614257812
        - 12.290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,28,803.608,28000,2169.4,18960,-1990,186.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,28,803.608,28000,2169.4,18960,-1990,186.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,28,803.608,28000,2169.4,18960,-1990,186.59


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 29000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 34.37
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-52-48
  done: false
  episode_len_mean: 186.59
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2166.0
  episode_reward_min: -1990.0
  episodes_this_iter: 6
  episodes_total: 153
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 28720
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.0649268627166748
          max_q: 221.58131408691406
          mean_q: 89.26041412353516
          min_q: -1.715686559677124
        mean_td_error: -10.513740539550781
        model: {}
        td_error:
        - -160.68186950683594
        - -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,29,818.066,29000,2166,18960,-1990,186.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,29,818.066,29000,2166,18960,-1990,186.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,29,818.066,29000,2166,18960,-1990,186.59


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 30000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 35.93
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-53-01
  done: false
  episode_len_mean: 186.09
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2350.3
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 158
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 29728
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.9627465009689331
          max_q: 166.4577178955078
          mean_q: 70.85044860839844
          min_q: 37.54254913330078
        mean_td_error: -21.961475372314453
        model: {}
        td_error:
        - -31.83312225341797
        - -5.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,30,831.382,30000,2350.3,18960,-1990,186.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,30,831.382,30000,2350.3,18960,-1990,186.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,30,831.382,30000,2350.3,18960,-1990,186.09


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 31000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 34.28
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-56-07
  done: false
  episode_len_mean: 186.09
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2160.5
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 163
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 30736
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.9956935048103333
          max_q: 122.12454986572266
          mean_q: 54.656982421875
          min_q: 7.23416805267334
        mean_td_error: -14.383983612060547
        model: {}
        td_error:
        - -149.721923828125
        - -1.5115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,31,1017.56,31000,2160.5,18960,-1990,186.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,31,1017.56,31000,2160.5,18960,-1990,186.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,31,1017.56,31000,2160.5,18960,-1990,186.09


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 32000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 34.47
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-56-20
  done: false
  episode_len_mean: 185.09
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2194.1
  episode_reward_min: -1990.0
  episodes_this_iter: 5
  episodes_total: 168
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 31744
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4707412123680115
          max_q: 214.57504272460938
          mean_q: 86.36329650878906
          min_q: 28.268726348876953
        mean_td_error: -20.164417266845703
        model: {}
        td_error:
        - 169.6354217529297
        - 44.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,32,1030.47,32000,2194.1,18960,-1990,185.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,32,1030.47,32000,2194.1,18960,-1990,185.09


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 33000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 36.24
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-56-33
  done: false
  episode_len_mean: 185.09
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2406.5
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 173
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 32752
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.6700262427330017
          max_q: 294.78399658203125
          mean_q: 85.35401153564453
          min_q: 7.511416912078857
        mean_td_error: -13.434589385986328
        model: {}
        td_error:
        - -64.26376342773438
        - -27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,33,1043.41,33000,2406.5,18960,-1980,185.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,33,1043.41,33000,2406.5,18960,-1980,185.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,33,1043.41,33000,2406.5,18960,-1980,185.09


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 34000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 34.6
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-56-48
  done: false
  episode_len_mean: 185.09
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2205.6
  episode_reward_min: -1980.0
  episodes_this_iter: 6
  episodes_total: 179
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 33760
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.6638398766517639
          max_q: 246.30313110351562
          mean_q: 74.47462463378906
          min_q: 13.519397735595703
        mean_td_error: -31.276891708374023
        model: {}
        td_error:
        - -161.00750732421875
        - 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,34,1057.97,34000,2205.6,18960,-1980,185.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,34,1057.97,34000,2205.6,18960,-1980,185.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,34,1057.97,34000,2205.6,18960,-1980,185.09


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 35000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 36.19
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-57-02
  done: false
  episode_len_mean: 184.03
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2398.2
  episode_reward_min: -1980.0
  episodes_this_iter: 6
  episodes_total: 185
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 34768
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 1.0457175970077515
          max_q: 366.8643798828125
          mean_q: 112.99520874023438
          min_q: 45.505577087402344
        mean_td_error: -12.137262344360352
        model: {}
        td_error:
        - -17.461776733398438
        - -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,35,1072.42,35000,2398.2,18960,-1980,184.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,35,1072.42,35000,2398.2,18960,-1980,184.03


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 36000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 37.51
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-57-15
  done: false
  episode_len_mean: 183.03
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2560.0
  episode_reward_min: -1980.0
  episodes_this_iter: 5
  episodes_total: 190
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 35776
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.8545992970466614
          max_q: 153.65585327148438
          mean_q: 81.91900634765625
          min_q: 11.780555725097656
        mean_td_error: -12.433076858520508
        model: {}
        td_error:
        - 62.713714599609375
        - 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,36,1085.33,36000,2560,18960,-1980,183.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,36,1085.33,36000,2560,18960,-1980,183.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,36,1085.33,36000,2560,18960,-1980,183.03


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 37000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 36.53
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-57-30
  done: false
  episode_len_mean: 184.01
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2437.5
  episode_reward_min: -2000.0
  episodes_this_iter: 6
  episodes_total: 196
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 36784
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.9420771598815918
          max_q: 230.1725311279297
          mean_q: 66.81520080566406
          min_q: 22.738718032836914
        mean_td_error: -32.60700988769531
        model: {}
        td_error:
        - 97.08783721923828
        - 38.36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,37,1099.99,37000,2437.5,18960,-2000,184.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,37,1099.99,37000,2437.5,18960,-2000,184.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,37,1099.99,37000,2437.5,18960,-2000,184.01


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 38000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 39.07
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-57-44
  done: false
  episode_len_mean: 181.5
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2760.5
  episode_reward_min: -2000.0
  episodes_this_iter: 6
  episodes_total: 202
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 37792
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.5473589897155762
          max_q: 127.06893920898438
          mean_q: 59.314857482910156
          min_q: 18.676048278808594
        mean_td_error: -16.270626068115234
        model: {}
        td_error:
        - 0.4595527648925781
        - -2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,38,1114.38,38000,2760.5,18960,-2000,181.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,38,1114.38,38000,2760.5,18960,-2000,181.5


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 39000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 37.85
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-57-57
  done: false
  episode_len_mean: 181.5
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2615.0
  episode_reward_min: -2000.0
  episodes_this_iter: 5
  episodes_total: 207
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 38800
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.8860577940940857
          max_q: 226.54945373535156
          mean_q: 74.90260314941406
          min_q: 8.688291549682617
        mean_td_error: -26.85384750366211
        model: {}
        td_error:
        - -10.554542541503906
        - -21.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,39,1127.26,39000,2615,18960,-2000,181.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,39,1127.26,39000,2615,18960,-2000,181.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,39,1127.26,39000,2615,18960,-2000,181.5


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 40000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 36.49
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-58-13
  done: false
  episode_len_mean: 181.18
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2449.4
  episode_reward_min: -2000.0
  episodes_this_iter: 7
  episodes_total: 214
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 39808
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.4881654977798462
          max_q: 247.72970581054688
          mean_q: 75.24235534667969
          min_q: 17.898033142089844
        mean_td_error: -9.309488296508789
        model: {}
        td_error:
        - -14.77850341796875
        - 2.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,40,1143.2,40000,2449.4,18960,-2000,181.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,40,1143.2,40000,2449.4,18960,-2000,181.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,40,1143.2,40000,2449.4,18960,-2000,181.18


Result for DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000:
  agent_timesteps_total: 41000
  callback_ok: true
  custom_metrics:
    nb_customers_dropped_max: 182
    nb_customers_dropped_mean: 36.75
    nb_customers_dropped_min: 0
  date: 2022-04-07_11-58-28
  done: false
  episode_len_mean: 180.47
  episode_media: {}
  episode_reward_max: 18960.0
  episode_reward_mean: 2490.8
  episode_reward_min: -2000.0
  episodes_this_iter: 6
  episodes_total: 220
  experiment_id: 830c55294ce54476ae25520abe6a38c6
  hostname: Wyames-MacBook-Pro.local
  info:
    last_target_update_ts: 40816
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_lr: 0.0005
          grad_gnorm: 0.7390322089195251
          max_q: 239.79965209960938
          mean_q: 66.03931427001953
          min_q: 14.457752227783203
        mean_td_error: -37.394248962402344
        model: {}
        td_error:
        - -39.163429260253906
        - -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_1_TaxiAgent_2_passangers_Manhattan_fc40e_00000,RUNNING,127.0.0.1:61530,41,1158.04,41000,2490.8,18960,-2000,180.47


In [None]:
# list of lists: one list per checkpoint; each checkpoint list contains
# 1st the path, 2nd the metric value
# checkpoints = analysis.get_trial_checkpoints_paths(
#     trial=analysis.get_best_trial("episode_reward_mean", mode='max'),
#     metric="episode_reward_mean")
checkpoints = analysis.get_last_checkpoint()

agent = dqn.DQNTrainer(
    config={'env_config': env_config, 'framework': 'torch'}, env=env_name)
agent.restore(checkpoints)

## test algo

In [43]:
env = env_multi_creator(env_config)
to_print=True
num_episodes=10

env.reset()
total_reward = []
# box_flag = isinstance(env.observation_space,gym.spaces.Box)
for episode in range(num_episodes):
    env.reset()
    state = env.reset()
    state_0 = state.copy()
    done = False
    ep_reward = 0
    step = 0
    if to_print:
        print("****************************************************")
        print("EPISODE ", episode)

    while not done:
        action = agent.compute_action(state)

        # action = env.action_space.sample()

#
        new_state, reward, done, info = env.step(action)
        # print(new_state)
        ep_reward +=reward
        step+=1

        if done:
            total_reward.append(ep_reward)
            if to_print:
                # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)
                # num_transported= list(state_0[1:]).count(1) - list(new_state[1:]).count(1)
                print('Initial state : ', state_0, ' Final state : ',new_state)

                # We print the number of step it took.
                print(f"Number of steps {step}, customers transported are {env.customers}, the total reward is {ep_reward}")

            break
        state = new_state
        # state = tuple(new_state) if box_flag else new_state

if to_print:
    print('Mean score: %.3f of %i games!'%(np.mean(total_reward), num_episodes))
# return np.mean(total_reward)

****************************************************
EPISODE  0
Initial state :  [[12. -1. -1.]]  Final state :  [[12. 11. 11.]]
Number of steps 200, customers transported are 10, the total reward is -830.0
****************************************************
EPISODE  1
Initial state :  [[53. -1. -1.]]  Final state :  [[53. 13. 13.]]
Number of steps 200, customers transported are 6, the total reward is -1290.0
****************************************************
EPISODE  2
Initial state :  [[31. -1. -1.]]  Final state :  [[31. 59. 59.]]
Number of steps 200, customers transported are 4, the total reward is -1520.0
****************************************************
EPISODE  3
Initial state :  [[17. -1. -1.]]  Final state :  [[17. 51. 51.]]
Number of steps 200, customers transported are 10, the total reward is -830.0
****************************************************
EPISODE  4
Initial state :  [[56. -1. -1.]]  Final state :  [[56. 57. 57.]]
Number of steps 200, customers transported 

## train RLlib

### train ppo

In [None]:
# ray.shutdown()
# ray.init()

# register_env("Multi_passanger_env", env_multi_creator)
# env_choice = "Multi_passanger_env"
# training_iter = 10
# # DEfine training parameters
# trainer_config = ppo.DEFAULT_CONFIG.copy()
# trainer_config['env_config']=env_config ##input to the envi
# # trainer_config['framework']='torch'
# trainer_config["clip_rewards"]=300
# trainer_config["seed"]=1042
# trainer_config['lr'] = 0.0001
# trainer_config['callbacks'] = MyCallbacks
# trainer_config['vf_clip_param']=50
# trainer = ppo.PPOTrainer(trainer_config, env=env_choice)
# ep_rew =[]
# nb_cust = []
# for i in range(training_iter):
#    # Perform one iteration of training the policy with PPO
#     result = trainer.train()
#     ep_rew.append(result['episode_reward_mean'])
#     nb_cust.append(result['custom_metrics']['nb_customers_dropped_mean'])
#     if (i+1) % 10 == 0:
#         checkpoint = trainer.save()
#         print("checkpoint saved at", checkpoint)
        
# dict_results[str(trainer)]={'episode_reward_mean':ep_rew,
#                             'number_customers':nb_cust}

### train dqn

In [None]:
# ray.shutdown()
# ray.init()

# register_env("Multi_passanger_env", env_multi_creator)
# env_choice = "Multi_passanger_env"
# # training_iter = 50
# # # DEfine training parameters
# trainer_config = {}
# trainer_config['env_config']=env_config ##input to the envi
# # trainer_config['framework']='torch'
# trainer_config["clip_rewards"]=300
# # trainer_config['lr'] = 0.0001
# trainer_config['callbacks'] = MyCallbacks
# trainer = dqn.DQNTrainer(trainer_config, env=env_choice)
# ep_rew =[]
# nb_cust = []
# for i in range(training_iter):
#    # Perform one iteration of training the policy with PPO
#     result = trainer.train()
#     ep_rew.append(result['episode_reward_mean'])
#     nb_cust.append(result['custom_metrics']['nb_customers_dropped_mean'])
#     if i % 10 == 0:
#         checkpoint = trainer.save()
#         print("checkpoint saved at", checkpoint)
        
# dict_results[str(trainer)]={'episode_reward_mean':ep_rew,
#                             'number_customers':nb_cust}

### train pg

In [None]:
# ray.shutdown()
# ray.init()

# register_env("Multi_passanger_env", env_multi_creator)
# # DEfine training parameters
# trainer_config = pg.DEFAULT_CONFIG.copy()
# trainer_config['env_config']=env_config ##input to the envi
# # trainer_config['framework']='torch'
# trainer_config["clip_rewards"]=300
# # trainer_config['lr'] = 0.0001
# trainer_config['callbacks'] = MyCallbacks
# # Train the model
# trainer = pg.PGTrainer(trainer_config, env=env_choice);
# ep_rew =[]
# nb_cust = []
# for i in range(training_iter):
#    # Perform one iteration of training the policy with PPO
#     result = trainer.train()
#     if i>0:
#         ep_rew.append(result['episode_reward_mean'])
#         nb_cust.append(result['custom_metrics']['nb_customers_dropped_mean'])
#     if i % 10 == 0:
#         checkpoint = trainer.save()
#         print("checkpoint saved at", checkpoint)
        
# dict_results[str(trainer)]={'episode_reward_mean':ep_rew,
#                             'number_customers':nb_cust}

### train A3C

In [None]:
# ray.shutdown()
# ray.init()

# register_env("Multi_passanger_env", env_multi_creator)
# # DEfine training parameters
# trainer_config = a3c.DEFAULT_CONFIG.copy()
# trainer_config['env_config']=env_config ##input to the envi
# # trainer_config['framework']='torch'
# trainer_config['callbacks'] = MyCallbacks
# # trainer_config['lr'] = 0.0001
# # Train the model
# trainer = a3c.A3CTrainer(trainer_config, env=env_choice);
# ep_rew =[]
# nb_cust = []
# for i in range(training_iter):
#    # Perform one iteration of training the policy with PPO
#     result = trainer.train()
#     if i>0:
#         ep_rew.append(result['episode_reward_mean'])
#         nb_cust.append(result['custom_metrics']['nb_customers_dropped_mean'])
#     if i % 10 == 0:
#         checkpoint = trainer.save()
#         print("checkpoint saved at", checkpoint)
        
# dict_results[str(trainer)]={'episode_reward_mean':ep_rew,
#                             'number_customers':nb_cust}