In [32]:
import gym
import pandas as pd
import os, sys
import _pickle as cPickle

if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("please declare environment variable 'SUMO_HOME'")

import libsumo as traci

class CitySimulation(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        # TODO: determine observation and action spaces for agent
        self.state = []
        self.done = 0
        self.reward = 0
        self.sumoCmd = ["/usr/bin/sumo/bin/sumo", "-c", "../sumo_simulation/sim_config/osm.sumocfg"]
        self.steps_in_hour = 20
        self.iteration_counter = 0 #Count simulation block steps
        self.zone_lane_mapper = cPickle.load(open('../sumo_simulation/input/action_to_zone.pkl', 'rb'))
        self.fis = cPickle.load(open('../sumo_simulation/input/fis.pkl', 'rb'))
        self.action_space = gym.spaces.MultiDiscrete([2 for i in range(len(self.zone_lane_mapper.keys()))])
        self.observation_space = gym.spaces.Box(low=0, high=np.infty, shape=(6, len(self.zone_lane_mapper.keys())))

    def step(self, action):
        '''
        Action represents logits from Agent. Probably MultiDescrete input.
        Must check if tf-agents outputs 1s and 0s or if a step function is needed.
        '''
        #Actions need to alter LANE_DISSALLOWED, within zones (need to compute a priori)
        #Assuming np.array input
        #shape = (n_lanes+1) * 24

        self.assignAllowedVehicles(action)
        reward_means = self.runSimulationSteps()
        self.iteration_counter += 1
        reward = self.get_reward(reward_means)

        if reward > self.reward:
            self.reward += reward
        else:
            self.reward -= reward

        return [self.state, self.reward, self.done, {}]#Fuzzy output for reward

    def reset(self):
        '''
        Generate simulation connection with libsumo.
        Creates subscriptions to emission and fuel consumption per lane for performance
        '''
        traci.start(self.sumoCmd)

        for edge_id in traci.lane.getIDList():
            traci.lane.subscribe(edge_id, [traci.constants.VAR_CO2EMISSION,
                                           traci.constants.VAR_COEMISSION,
                                           traci.constants.VAR_PMXEMISSION,
                                           traci.constants.VAR_NOXEMISSION,
                                           traci.constants.VAR_NOISEEMISSION,
                                           traci.constants.VAR_FUELCONSUMPTION])

        self.state = []
        self.done = 0
        self.reward = 0
        self.iteration_counter = 0

    def render(self):
        return self.state

    #Helper functions
    def assignAllowedVehicles(self, action):
        '''
        Recieve an action array and alter allowed vehicles per lane selected
        Must recieve 1s and 0s in array
        '''
        lane_indices = np.where(action == 1)[0]
        for lane_id in lane_indices:
            try:
                traci.lane.setDissallowed(lane_id, 'truck')
            except:
                pass

        return 1

    def runSimulationSteps(self):
        '''
        This function runs a block of SUMO simulations and returns the emission and
        fuel consumption state. The state the agent percieves will be
        '''

        for _ in range(self.steps_in_hour):
            #Check if all vehicles have left the simulation
            if traci.simulation.getMinExpectedNumber() == 0:
                self.done = 1
                sim_results = pd.DataFrame.from_dict(traci.lane.getAllSubscriptionResults())
                self.state = sim_results.values
                traci.close()

                #Returns state means for reward calculation
                return sim_results.T.mean().values

            else:
                traci.simulationStep()

        sim_results = pd.DataFrame.from_dict(traci.lane.getAllSubscriptionResults())
        self.state = sim_results.values

        if self.iteration_counter == 3:
            self.done = 1
            traci.close()

        return sim_results.T.mean()

    def get_reward(self, means):
        self.fis.input['co2'] = means[96]
        self.fis.input['co'] = means[97]
        self.fis.input['pmx'] = means[99]
        self.fis.input['nox'] = means[100]
        self.fis.input['noise'] = means[101]
        self.fis.input['fuel'] = means[102]
        self.fis.compute()
        return self.fis.output['output']


In [33]:
cs = CitySimulation()

In [34]:
cs.reset()

In [35]:
test = cs.action_space.sample()

In [37]:
cs.step(test)

[array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]), 164.2847288621244, 0, {}]

In [38]:
cs.assignAllowedVehicles(test)

1