In [1]:
import numpy as np
import math
import random

In [15]:
class TrafficGenerator:
    def __init__(self, max_steps, n_cars_generated):
        self._n_cars_generated = n_cars_generated  # how many cars per episode
        self._max_steps = max_steps

    def generate_routefile(self, seed):
        np.random.seed(seed)  # make tests reproducible
        # the generation of cars is distributed according to a weibull distribution
        timings = np.random.weibull(2, self._n_cars_generated)
        timings = np.sort(timings)

        # reshape the distribution to fit the interval 0:max_steps
        car_gen_steps = []
        min_old = math.floor(timings[1])
        max_old = math.ceil(timings[-1])
        min_new = 0
        max_new = self._max_steps
        for value in timings:
            car_gen_steps = np.append(car_gen_steps, ((max_new - min_new) / (max_old - min_old)) * (value - max_old) + max_new)

        car_gen_steps = np.rint(car_gen_steps)  # round every value to int -> effective steps when a car will be generated

        # produce the file for cars generation, one car per line
        with open("map.rou.xml", "w") as routes:
            print("""<routes>
            <vType accel="1.0" decel="4.5" id="standard_car" length="5.0" minGap="2.5" maxSpeed="40" sigma="0.5" />

            <route id="D_F" edges="D2I D2 I2F"/>#F=FSSM/AF=Allal lfasi/D=bab dkala/G=gueliz
            <route id="D_AF_2" edges="D2I D2 2AF I2AF"/>
            <route id="D_AF" edges="D2I D2AF_2 I2AF"/>
            <route id="D_G" edges="D2I D2 I2G"/>
            <route id="F_D" edges="F2I I2D"/>
            <route id="F_AF" edges="F2I 2AF I2AF"/>
            <route id="F_G" edges="F2I I2G"/>
            <route id="AF_F" edges="AF2I I2F"/>
            <route id="AF_G" edges="AF2I I2G"/>
            <route id="AF_D" edges="AF2I I2D"/>
            <route id="G_D" edges="G2I I2D"/>
            <route id="G_AF" edges="G2I 2AF I2AF"/>
            <route id="G_F" edges="G2I I2F"/>""", file=routes)

            for car_counter, step in enumerate(car_gen_steps):
                straight_or_turn = np.random.uniform()
                if straight_or_turn < 0.75:  # choose direction: straight or turn - 75% of times the car goes straight
                    route_straight = np.random.randint(1, 5)  # choose a random source & destination
                    if route_straight == 1:
                        print('    <vehicle id="D_F_%i" type="standard_car" route="D_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_straight == 2:
                        print('    <vehicle id="F_D_%i" type="standard_car" route="F_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_straight == 3:
                        print('    <vehicle id="AF_G_%i" type="standard_car" route="AF_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    else:
                        print('    <vehicle id="G_AF_%i" type="standard_car" route="G_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                else:  # car that turn -25% of the time the car turns
                    route_turn = np.random.randint(1, 9)  # choose random source source & destination
                    if route_turn == 1:
                        if(np.random.uniform()>0.9):
                            print('    <vehicle id="D_AF_2_%i" type="standard_car" route="D_AF_2" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        else:
                            print('    <vehicle id="D_AF_%i" type="standard_car" route="D_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 2:
                        print('    <vehicle id="D_G_%i" type="standard_car" route="D_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 3:
                        print('    <vehicle id="F_AF_%i" type="standard_car" route="F_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 4:
                        print('    <vehicle id="F_G_%i" type="standard_car" route="F_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 5:
                        print('    <vehicle id="G_D_%i" type="standard_car" route="G_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 6:
                        print('    <vehicle id="G_F_%i" type="standard_car" route="G_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 7:
                        print('    <vehicle id="AF_F_%i" type="standard_car" route="AF_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 8:
                        print('    <vehicle id="AF_D_%i" type="standard_car" route="AF_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)

            print("</routes>", file=routes)

In [16]:
temp=TrafficGenerator(5400,2000)
temp.generate_routefile(42)

In [17]:
class Memory:
    def __init__(self, size_max, size_min):
        self._samples = []
        self._size_max = size_max
        self._size_min = size_min
    def add_sample(self, sample):
        """
        Add a sample into the memory
        """
        self._samples.append(sample)
        if self._size_now() > self._size_max:
            self._samples.pop(0)  # if the length is greater than the size of memory, remove the oldest element
    def get_samples(self, n):
        """
        Get n samples randomly from the memory
        """
        if self._size_now() < self._size_min:
            return []

        if n > self._size_now():
            return random.sample(self._samples, self._size_now())  # get all the samples
        else:
            return random.sample(self._samples, n)  # get "batch size" number of samples
    def _size_now(self):
        return len(self._samples)

In [18]:
import traci
import numpy as np
import random
import timeit
import os

# phase codes based on map.net.xml
PHASE_DandF_GREEN = 0  # action 0 code 00
PHASE_DandF_YELLOW = 1
PHASE_GandAF_GREEN = 2  # action 1 code 01
PHASE_GandAF_YELLOW = 3

class Simulation:
    def __init__(self, Model, Memory, TrafficGen, sumo_cmd, gamma, max_steps, green_duration, yellow_duration, num_states, num_actions, training_epochs):
        self._Model = Model
        self._Memory = Memory
        self._TrafficGen = TrafficGen
        self._gamma = gamma
        self._step = 0
        self._sumo_cmd = sumo_cmd
        self._max_steps = max_steps
        self._green_duration = green_duration
        self._yellow_duration = yellow_duration
        self._num_states = num_states
        self._num_actions = num_actions
        self._reward_store = []
        self._cumulative_wait_store = []
        self._avg_queue_length_store = []
        self._training_epochs = training_epochs


    def run(self, episode, epsilon):
        start_time = timeit.default_timer()

        # first, generate the route file for this simulation and set up sumo
        self._TrafficGen.generate_routefile(seed=episode)
        traci.start(self._sumo_cmd)
        print("Simulating...")

        # inits
        self._step = 0
        self._waiting_times = {}
        self._sum_neg_reward = 0
        self._sum_queue_length = 0
        self._sum_waiting_time = 0
        old_total_wait = 0
        old_state = -1
        old_action = -1

        while self._step < self._max_steps:

            # get current state of the intersection
            current_state = self._get_state()

            # calculate reward of previous action: (change in cumulative waiting time between actions)
            # waiting time = seconds waited by a car since the spawn in the environment, cumulated for every car in incoming lanes
            current_total_wait = self._collect_waiting_times()
            reward = old_total_wait - current_total_wait

            # saving the data into the memory
            if self._step != 0:
                self._Memory.add_sample((old_state, old_action, reward, current_state))

            # choose the light phase to activate, based on the current state of the intersection
            action = self._choose_action(current_state, epsilon)

            # if the chosen phase is different from the last phase, activate the yellow phase
            if self._step != 0 and old_action != action:
                self._set_yellow_phase(old_action)
                self._simulate(self._yellow_duration)

            # execute the phase selected before
            self._set_green_phase(action)
            self._simulate(self._green_duration)

            # saving variables for later & accumulate reward
            old_state = current_state
            old_action = action
            old_total_wait = current_total_wait

            # saving only the meaningful reward to better see if the agent is behaving correctly
            if reward < 0:
                self._sum_neg_reward += reward

        self._save_episode_stats()
        print("Total reward:", self._sum_neg_reward, "- Epsilon:", round(epsilon, 2))
        traci.close()
        simulation_time = round(timeit.default_timer() - start_time, 1)

        print("Training...")
        start_time = timeit.default_timer()
        for _ in range(self._training_epochs):
            self._replay()
        training_time = round(timeit.default_timer() - start_time, 1)

        return simulation_time, training_time


    def _simulate(self, steps_todo):
        """
        Execute steps in sumo while gathering statistics
        """
        if (self._step + steps_todo) >= self._max_steps:  # do not do more steps than the maximum allowed number of steps
            steps_todo = self._max_steps - self._step
            
        while steps_todo > 0:
            traci.simulationStep()  # simulate 1 step in sumo
            self._step += 1 # update the step counter
            steps_todo -= 1
            queue_length = self._get_queue_length()
            self._sum_queue_length += queue_length
            self._sum_waiting_time += queue_length # 1 step while wating in queue means 1 second waited, for each car, therefore queue_lenght == waited_seconds


    def _collect_waiting_times(self):
        """
        Retrieve the waiting time of every car in the incoming roads
        """
        incoming_roads = ["D2I","D2", "F2I", "AF2I", "G2I"]
        car_list = traci.vehicle.getIDList()
        for car_id in car_list:
            wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
            road_id = traci.vehicle.getRoadID(car_id)  # get the road id where the car is located
            if road_id in incoming_roads:  # consider only the waiting times of cars in incoming roads
                self._waiting_times[car_id] = wait_time
            else:
                if car_id in self._waiting_times: # a car that was tracked has cleared the intersection
                    del self._waiting_times[car_id] 
        total_waiting_time = sum(self._waiting_times.values())
        return total_waiting_time


    def _choose_action(self, state, epsilon):
        """
        Decide wheter to perform an explorative or exploitative action, according to an epsilon-greedy policy
        """
        if random.random() < epsilon:
            return random.randint(0, self._num_actions - 1) # random action
        else:
            return np.argmax(self._Model.predict_one(state)) # the best action given the current state


    def _set_yellow_phase(self, old_action):
        """
        Activate the correct yellow light combination in sumo
        """
        yellow_phase_code = old_action * 2 + 1 # obtain the yellow phase code, based on the old action (ref on environment.net.xml)
        traci.trafficlight.setPhase("TL", yellow_phase_code)


    def _set_green_phase(self, action_number):
        """
        Activate the correct green light combination in sumo
        """
        if action_number == 0:
            traci.trafficlight.setPhase("TL", PHASE_DandF_GREEN)
        elif action_number == 1:
            traci.trafficlight.setPhase("TL", PHASE_GandAF_GREEN)

    def _get_queue_length(self):
        """
        Retrieve the number of cars with speed = 0 in every incoming lane
        """
        halt_D = traci.edge.getLastStepHaltingNumber("D2I")
        halt_D =halt_D + traci.edge.getLastStepHaltingNumber("D2")
        halt_F = traci.edge.getLastStepHaltingNumber("F2I")
        halt_AF = traci.edge.getLastStepHaltingNumber("AF2I")
        halt_G = traci.edge.getLastStepHaltingNumber("G2I")
        queue_length = halt_D + halt_F + halt_AF + halt_G
        return queue_length


    def _get_state(self):
        """
        Retrieve the state of the intersection from sumo, in the form of cell occupancy
        """
        state = np.zeros(self._num_states)
        car_list = traci.vehicle.getIDList()

        for car_id in car_list:
            lane_pos = traci.vehicle.getLanePosition(car_id)
            lane_id = traci.vehicle.getLaneID(car_id)
            lane_pos = 500- lane_pos  # inversion of lane pos, so if the car is close to the traffic light -> lane_pos = 0 --- 750 = max len of a road

            # distance in meters from the traffic light -> mapping into cells
            if lane_pos < 7:
                lane_cell = 0
            elif lane_pos < 14:
                lane_cell = 1
            elif lane_pos < 21:
                lane_cell = 2
            elif lane_pos < 28:
                lane_cell = 3
            elif lane_pos < 40:
                lane_cell = 4
            elif lane_pos < 60:
                lane_cell = 5
            elif lane_pos < 100:
                lane_cell = 6
            elif lane_pos < 160:
                lane_cell = 7
            elif lane_pos < 300:
                lane_cell = 8
            elif lane_pos < 500:
                lane_cell = 9
            

            # finding the lane where the car is located 

            "D2I","D2", "F2I", "AF2I", "G2I"
            if lane_id == "D2I_0" or lane_id == "D2I_1" or lane_id == "D2_0" or lane_id == "D2_1":
                lane_group = 0
            elif lane_id == "F2I_1" or lane_id == "F2I_0":
                lane_group = 1
            elif lane_id == "AF2I_1" or lane_id == "AF2I_0":
                lane_group = 2
            elif lane_id == "G2I_1" or lane_id == "G2I_0":
                lane_group = 3
            else:
                lane_group = -1

            if lane_group >= 1 and lane_group <= 7:
                car_position = int(str(lane_group) + str(lane_cell))  # composition of the two postion ID to create a number in interval 0-79
                valid_car = True
            elif lane_group == 0:
                car_position = lane_cell
                valid_car = True
            else:
                valid_car = False  # flag for not detecting cars crossing the intersection or driving away from it

            if valid_car:
                state[car_position] = 1  # write the position of the car car_id in the state array in the form of "cell occupied"
        return state


    def _replay(self):
        """
        Retrieve a group of samples from the memory and for each of them update the learning equation, then train
        """
        batch = self._Memory.get_samples(self._Model.batch_size)

        if len(batch) > 0:  # if the memory is full enough
            states = np.array([val[0] for val in batch])  # extract states from the batch
            next_states = np.array([val[3] for val in batch])  # extract next states from the batch

            # prediction
            q_s_a = self._Model.predict_batch(states)  # predict Q(state), for every sample
            q_s_a_d = self._Model.predict_batch(next_states)  # predict Q(next_state), for every sample

            # setup training arrays
            x = np.zeros((len(batch), self._num_states))
            y = np.zeros((len(batch), self._num_actions))

            for i, b in enumerate(batch):
                state, action, reward, _ = b[0], b[1], b[2], b[3]  # extract data from one sample
                current_q = q_s_a[i]  # get the Q(state) predicted before
                current_q[action] = reward + self._gamma * np.amax(q_s_a_d[i])  # update Q(state, action)
                x[i] = state
                y[i] = current_q  # Q(state) that includes the updated action value

            self._Model.train_batch(x, y)  # train the NN


    def _save_episode_stats(self):
        """
        Save the stats of the episode to plot the graphs at the end of the session
        """
        self._reward_store.append(self._sum_neg_reward)  # how much negative reward in this episode
        self._cumulative_wait_store.append(self._sum_waiting_time)  # total number of seconds waited by cars in this episode
        self._avg_queue_length_store.append(self._sum_queue_length / self._max_steps)  # average number of queued cars per step, in this episode


    @property
    def reward_store(self):
        return self._reward_store


    @property
    def cumulative_wait_store(self):
        return self._cumulative_wait_store


    @property
    def avg_queue_length_store(self):
        return self._avg_queue_length_store



In [19]:

gui = False
total_episodes = 100
max_steps = 5400
n_cars_generated = 1000
green_duration = 10
yellow_duration = 4

num_layers = 4
width_layers = 400
batch_size = 100
learning_rate = 0.001
training_epochs = 800


memory_size_min = 600
memory_size_max = 50000


num_states = 40
num_actions = 2
gamma = 0.75
path='./'

In [None]:
from __future__ import absolute_import
from __future__ import print_function

import os
import datetime
from shutil import copyfile

# from model import TrainModel
# from visualization import Visualization
# from utils import import_train_configuration, set_sumo, set_train_path





sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])


Model = TrainModel(
    num_layers, 
    width_layers, 
    batch_size, 
    learning_rate, 
    input_dim=num_states, 
    output_dim=num_actions
)

Memory = Memory(
    memory_size_max, 
    memory_size_min
)

TrafficGen = TrafficGenerator(
    max_steps, 
    n_cars_generated
)

Visualization = Visualization(
    path, 
    dpi=96
)
    
Simulation = Simulation(
    Model,
    Memory,
    TrafficGen,
    sumo_cmd,
    gamma,
    max_steps,
    green_duration,
    yellow_duration,
    num_states,
    num_actions,
    training_epochs
)

episode = 0
timestamp_start = datetime.datetime.now()

while episode < config['total_episodes']:
    print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
    epsilon = 1.0 - (episode / config['total_episodes'])  # set the epsilon for this episode according to epsilon-greedy policy
    simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
    print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
    episode += 1

print("\n----- Start time:", timestamp_start)
print("----- End time:", datetime.datetime.now())
print("----- Session info saved at:", path)

Model.save_model(path)


Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')

In [20]:
from model import TrainModel

ModuleNotFoundError: No module named 'model'