In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import gym
from gym.spaces import Discrete, Box
from gym.envs.registration import EnvSpec

import ray
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.tune import run_experiments
from ray.tune.registry import register_env

lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.


In [2]:
class Routing_Env(gym.Env):
    """
    Description: Keeps track of each car's Own Env. 
    """
    def __init__(self, config):
        self.num_actions = config['num_paths']
        self.soc_fac = config['soc_fac']
        self.num_obs = 2
        # Adding input for the comm channel
        if config['comm']:
            self.num_obs+=1
        self.viewer = None
        self.state = None

    def reset(self):
        # Create initial observation for vehicle
        prev_path = -1
        prev_time = -1
        self.state = [prev_path, prev_time]
        return self.state


    def step_after_action_applied(self, path_choice, path_cost=0, marginal_cost=0):
        """
        Note: Before you calculate the travel time, you need to update your network of the car's action choices first.
              Otherwise, you won't be able to calculate the accurate travel times for each car. 
        """
        self.state = [path_choice, path_cost]
        cost = (1-self.soc_fac)*path_cost + self.soc_fac*marginal_cost
        rew = -cost
        done = True  # We are always done after 1 step of the environment
        return self.state, rew, done, {}

    @property
    def observation_space(self):
        return Box(
            low = 0,
            high = float('+inf'),
            shape = (self.num_obs,),
            dtype = np.float32
        )

    @property
    def action_space(self):
        return Discrete(self.num_actions)
    
    def get_state(self):
        return self.state
    

In [3]:
class MultiAgentRouting(MultiAgentEnv):
    """
    Description:
        Cars all start at the same origin point, Point A, and need to reach the same destination, Point Z. 
        Each car can reach Point Z via a variety of routing choices described in a given network, in which 
        each route introduces different travel times and congestion. 
        The goal is to minimize the average travel times amongst each of the cars.
    
    Observation: 
        Type: Box(2) - for partially observed env
        Num	Observation                 Min          Max
        0	Previous Route Choice        0      total_routes-1
        1	Route Travel Time            0           +Inf
        
    Actions:
        Type: Discrete(num_of_paths)
        Num	Action
        0	Precede via Path 1
        1	Precede via Path 2
        ...
        n	Precede via Path n
            
    Reward:
        Reward for each car is determined by the following formula: 
        marginal_cost = d[x_e*t(x_e)]/d[x_e]
        Cost = (1-λ)route_travel_time + λ(marginal_cost)
        Reward = -Cost
        ***
        route_travel_time: Travel time of the route previously taken by the car
        marginal_cost: Cost that the car's route choice imposes on everyone else. 
                       The formula above captures the change in the travel flow 
                       with respect to the change in vehicle flow on a given road.
        λ: Weight Toward Social Good
        
    
    Starting State:
        All observations are assigned -1 for path choice and travel times.
    
    Episode Termination:
        Cars keeps a consistent routing distribution.
        Episode length is greater than 200
        Solved Requirements
        Considered solved when the average travel time is less than or equal to the theorical social optimum. 
    """
    def __init__(self, config):
        self.network_name = config['network']
        self.agents = [gym.make(Routing_Env) for _ in range(config['num_vehicles'])]
        self.dones = set()
        self.observation_space = self.agents[0].observation_space
        self.action_space = self.agents[0].action_space
        return

    def reset(self):
        # Create initial observations for each vehicle
        prev_path = -1
        prev_time = -1
        self.state = {i: [prev_path, prev_time] for i in range(self.num_veh)}
        return self.state


    def step(self, action_dict):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        obs, rew, done, info, costs = {}, {}, {}, {}, {}
        # Apply the actions to the network 
        for i, action in action_dict.items():
            # CHANGE THE FOLLOWING!!
            costs[i]['path_cost'] = action
            costs[i]['marginal_cost'] = 0

        # Step the environment
        for i, action in action_dict.items():
            obs[i], rew[i], done[i], info[i] = self.agents[i].step_after_action_applied(action, 
                                                                                        path_cost=costs[i]['path_cost'], 
                                                                                        marginal_cost=costs[i]['marginal_cost'])
            if done[i]:
                self.dones.add(i)
        done["__all__"] = len(self.dones) == len(self.agents)
        return obs, rew, done, info

In [None]:
if __name__ == "__main__":
    env_creator_name = 'multi_routing'
    register_env(env_creator_name, lambda config: MultiAgentRouting(config))
    ray.init()
    experiments = {
        'route-DQN': {
            'run': 'DQN',
            'env': 'multi_routing',
            'stop': {
                'training_iteration': 10
            },
            'config': {
                'env_config': {
                    'network': 'Braess',
                    'num_vehicles': 1,
                    'num_paths': 2,
                    'comm': False
                },
            }
        },
        # put additional experiments to run concurrently here
    }
    
    run_experiments(experiments)

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import gym
from gym.spaces import Discrete, Box
from gym.envs.registration import EnvSpec

import ray
from ray.tune import run_experiments
from ray.tune.registry import register_env


class SimpleCorridor(gym.Env):

    """Example of a custom env in which you have to walk down a corridor.
    You can configure the length of the corridor via the env config."""

    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(0.0, self.end_pos, shape=(1, ), dtype=np.float32)
        self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos))

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}

if __name__ == "__main__":
    env_creator_name = "corridor"
    register_env(env_creator_name, lambda config: SimpleCorridor(config))
    ray.init()
    run_experiments({
        "demo": {
            "run": "DQN",
            "env": "corridor",
            "config": {
                "env_config": {
                    "corridor_length": 5,
                },
            },
        },
    })

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2018-11-26_14-10-25_44054/logs.
Waiting for redis server at 127.0.0.1:45123 to respond...
Waiting for redis server at 127.0.0.1:55233 to respond...
Starting the Plasma object store with 6.871947672999999 GB memory using /tmp.

View the web UI at http://localhost:8892/notebooks/ray_ui.ipynb?token=f2ae5821200f09cc33e954ded617b94e55cac889f13c1861

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 12.8/17.2 GB

Created LogSyncer for /Users/mtgibson/ray_results/demo/DQN_corridor_0_2018-11-26_14-10-26e5fckyv0 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 1/8 CPUs, 0/0 GPUs
Memory usage on this node: 12.8/17.2 GB
Result logdir: /Users/mtgibson/ray_results/demo
RUNNING trials:
 - DQN_corridor_0:	RUNNING

Remote function [31m__init__[39m failed with:

Traceback (most recent call last):
  File "/Users/mtgibson/ray/python/ray/worker.py", line 

Worker ip unknown, skipping log sync for /Users/mtgibson/ray_results/demo/DQN_corridor_0_2018-11-26_14-10-26e5fckyv0
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 11.9/17.2 GB
Result logdir: /Users/mtgibson/ray_results/demo
ERROR trials:
 - DQN_corridor_0:	ERROR, 1 failures: /Users/mtgibson/ray_results/demo/DQN_corridor_0_2018-11-26_14-10-26e5fckyv0/error_2018-11-26_14-10-34.txt

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 11.9/17.2 GB
Result logdir: /Users/mtgibson/ray_results/demo
ERROR trials:
 - DQN_corridor_0:	ERROR, 1 failures: /Users/mtgibson/ray_results/demo/DQN_corridor_0_2018-11-26_14-10-26e5fckyv0/error_2018-11-26_14-10-34.txt



TuneError: ('Trials did not complete', [DQN_corridor_0])