## Install Envirenment

In [3]:
!sudo add-apt-repository ppa:sumo/stable -y
!sudo apt-get update -y
!sudo apt-get install sumo sumo-tools sumo-doc -y

Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Hit:2 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Hit:3 http://archive.ubuntu.com/ubuntu bionic InRelease
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Hit:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:7 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:8 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:9 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:11 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease
Get:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [83.3 kB]
Hit:13 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Get:14 http://archive.ubuntu.com/ubuntu b

In [4]:
!pip install traci

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting traci
  Downloading traci-1.15.0-py3-none-any.whl (262 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m262.7/262.7 KB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sumolib>=1.15.0
  Downloading sumolib-1.15.0-py3-none-any.whl (144 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.5/144.5 KB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sumolib, traci
Successfully installed sumolib-1.15.0 traci-1.15.0


In [1]:
import numpy as np
import datetime
import math
import random
from sumolib import checkBinary
import matplotlib.pyplot as plt
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'  # kill warning about tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
#os.environ['SUMO_HOME'] = "/usr/share/sumo/"

In [151]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [44]:
class TrafficGenerator:
    def __init__(self, max_steps, n_cars_generated):
        self._n_cars_generated = n_cars_generated  # how many cars per episode
        self._max_steps = max_steps

    def generate_routefile(self, seed):
        np.random.seed(seed)  # make tests reproducible
        # the generation of cars is distributed according to a weibull distribution
        timings = np.random.weibull(2, self._n_cars_generated)
        timings = np.sort(timings)

        # reshape the distribution to fit the interval 0:max_steps
        car_gen_steps = []
        min_old = math.floor(timings[1])
        max_old = math.ceil(timings[-1])
        min_new = 0
        max_new = self._max_steps
        for value in timings:
            car_gen_steps = np.append(car_gen_steps, ((max_new - min_new) / (max_old - min_old)) * (value - max_old) + max_new)

        car_gen_steps = np.rint(car_gen_steps)  # round every value to int -> effective steps when a car will be generated

        # produce the file for cars generation, one car per line
        with open("map.rou.xml", "w") as routes:
            print("""<routes>
            <vType accel="1.0" decel="4.5" id="standard_car" length="5.0" minGap="2.5" maxSpeed="40" sigma="0.5" />

            <route id="D_F" edges="D2I D2 I2F"/>#F=FSSM/AF=Allal lfasi/D=bab dkala/G=gueliz
            <route id="D_AF_2" edges="D2I D2 2AF I2AF"/>
            <route id="D_AF" edges="D2I D2AF_2 I2AF"/>
            <route id="D_G" edges="D2I D2 I2G"/>
            <route id="F_D" edges="F2I I2D"/>
            <route id="F_AF" edges="F2I 2AF I2AF"/>
            <route id="F_G" edges="F2I I2G"/>
            <route id="AF_F" edges="AF2I I2F"/>
            <route id="AF_G" edges="AF2I I2G"/>
            <route id="AF_D" edges="AF2I I2D"/>
            <route id="G_D" edges="G2I I2D"/>
            <route id="G_AF" edges="G2I 2AF I2AF"/>
            <route id="G_F" edges="G2I I2F"/>""", file=routes)

            for car_counter, step in enumerate(car_gen_steps):
                straight_or_turn = np.random.uniform()
                if straight_or_turn < 0.75:  # choose direction: straight or turn - 75% of times the car goes straight
                    route_straight = np.random.randint(1, 5)  # choose a random source & destination
                    if route_straight == 1:
                        print('    <vehicle id="D_F_%i" type="standard_car" route="D_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_straight == 2:
                        print('    <vehicle id="F_D_%i" type="standard_car" route="F_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_straight == 3:
                        print('    <vehicle id="AF_G_%i" type="standard_car" route="AF_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    else:
                        print('    <vehicle id="G_AF_%i" type="standard_car" route="G_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                else:  # car that turn -25% of the time the car turns
                    route_turn = np.random.randint(1, 9)  # choose random source source & destination
                    if route_turn == 1:
                        if(np.random.uniform()>0.9):
                            print('    <vehicle id="D_AF_2_%i" type="standard_car" route="D_AF_2" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        else:
                            print('    <vehicle id="D_AF_%i" type="standard_car" route="D_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 2:
                        print('    <vehicle id="D_G_%i" type="standard_car" route="D_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 3:
                        print('    <vehicle id="F_AF_%i" type="standard_car" route="F_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 4:
                        print('    <vehicle id="F_G_%i" type="standard_car" route="F_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 5:
                        print('    <vehicle id="G_D_%i" type="standard_car" route="G_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 6:
                        print('    <vehicle id="G_F_%i" type="standard_car" route="G_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 7:
                        print('    <vehicle id="AF_F_%i" type="standard_car" route="AF_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 8:
                        print('    <vehicle id="AF_D_%i" type="standard_car" route="AF_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
            print("</routes>", file=routes)
            
    def generate_sinario(self, seed,sinario):
        np.random.seed(seed)  # make tests reproducible
        # the generation of cars is distributed according to a weibull distribution
        timings = np.random.weibull(2, self._n_cars_generated)
        timings = np.sort(timings)

        # reshape the distribution to fit the interval 0:max_steps
        car_gen_steps = []
        min_old = math.floor(timings[1])
        max_old = math.ceil(timings[-1])
        min_new = 0
        max_new = self._max_steps
        for value in timings:
            car_gen_steps = np.append(car_gen_steps, ((max_new - min_new) / (max_old - min_old)) * (value - max_old) + max_new)

        car_gen_steps = np.rint(car_gen_steps)  # round every value to int -> effective steps when a car will be generated

        # produce the file for cars generation, one car per line
        with open("map.rou.xml", "w") as routes:
            print("""<routes>
            <vType accel="1.0" decel="4.5" id="standard_car" length="5.0" minGap="2.5" maxSpeed="40" sigma="0.5" />

            <route id="D_F" edges="D2I D2 I2F"/>#F=FSSM/AF=Allal lfasi/D=bab dkala/G=gueliz
            <route id="D_AF_2" edges="D2I D2 2AF I2AF"/>
            <route id="D_AF" edges="D2I D2AF_2 I2AF"/>
            <route id="D_G" edges="D2I D2 I2G"/>
            <route id="F_D" edges="F2I I2D"/>
            <route id="F_AF" edges="F2I 2AF I2AF"/>
            <route id="F_G" edges="F2I I2G"/>
            <route id="AF_F" edges="AF2I I2F"/>
            <route id="AF_G" edges="AF2I I2G"/>
            <route id="AF_D" edges="AF2I I2D"/>
            <route id="G_D" edges="G2I I2D"/>
            <route id="G_AF" edges="G2I 2AF I2AF"/>
            <route id="G_F" edges="G2I I2F"/>""", file=routes)

            for car_counter, step in enumerate(car_gen_steps):
                straight_or_turn = np.random.uniform()
                if straight_or_turn < 0.85:  # choose direction: straight or turn - 75% of times the car goes straight
                    route_straight = np.random.randint(1, 3)  # choose a random source & destination
                    if(sinario==1):
        
                        if route_straight == 1:
                            print('    <vehicle id="D_F_%i" type="standard_car" route="D_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        elif route_straight == 2:
                            print('    <vehicle id="F_D_%i" type="standard_car" route="F_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    if(sinario==2):
                        if route_straight == 1:
                            print('    <vehicle id="AF_G_%i" type="standard_car" route="AF_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        elif route_straight == 2:
                            print('    <vehicle id="G_AF_%i" type="standard_car" route="G_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                else:  # car that turn -25% of the time the car turns
                    route_turn = np.random.randint(1, 11)  # choose random source source & destination
                    if route_turn == 1:
                        if(np.random.uniform()>0.9):
                            print('    <vehicle id="D_AF_2_%i" type="standard_car" route="D_AF_2" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        else:
                            print('    <vehicle id="D_AF_%i" type="standard_car" route="D_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 2:
                        print('    <vehicle id="D_G_%i" type="standard_car" route="D_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 3:
                        print('    <vehicle id="F_AF_%i" type="standard_car" route="F_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 4:
                        print('    <vehicle id="F_G_%i" type="standard_car" route="F_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 5:
                        print('    <vehicle id="G_D_%i" type="standard_car" route="G_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 6:
                        print('    <vehicle id="G_F_%i" type="standard_car" route="G_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 7:
                        print('    <vehicle id="AF_F_%i" type="standard_car" route="AF_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    elif route_turn == 8:
                        print('    <vehicle id="AF_D_%i" type="standard_car" route="AF_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    if(sinario==1):
                        if route_turn == 9:
                            print('    <vehicle id="AF_G_%i" type="standard_car" route="AF_G" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        else:
                            print('    <vehicle id="G_AF_%i" type="standard_car" route="G_AF" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                    if(sinario==2):
                        if route_turn == 9:
                            print('    <vehicle id="D_F_%i" type="standard_car" route="D_F" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
                        else:
                            print('    <vehicle id="F_D_%i" type="standard_car" route="F_D" depart="%s" departLane="random" departSpeed="30" />' % (car_counter, step), file=routes)
            print("</routes>", file=routes)

In [3]:
class Memory:
    def __init__(self, size_max, size_min):
        self._samples = []
        self._size_max = size_max
        self._size_min = size_min
    def add_sample(self, sample):
        """
        Add a sample into the memory
        """
        self._samples.append(sample)
        if self._size_now() > self._size_max:
            self._samples.pop(0)  # if the length is greater than the size of memory, remove the oldest element
    def get_samples(self, n):
        """
        Get n samples randomly from the memory
        """
        if self._size_now() < self._size_min:
            return []

        if n > self._size_now():
            return random.sample(self._samples, self._size_now())  # get all the samples
        else:
            return random.sample(self._samples, n)  # get "batch size" number of samples
    def _size_now(self):
        return len(self._samples)

In [39]:
import traci
import numpy as np
import random
import timeit
import os

# phase codes based on map.net.xml
PHASE_DandF_GREEN = 0  # action 0 code 00
PHASE_DandF_YELLOW = 1
PHASE_GandAF_GREEN = 2  # action 1 code 01
PHASE_GandAF_YELLOW = 3

class Simulation:
    def __init__(self, Model,TrafficGen,sumo_cmd, max_steps, green_duration, yellow_duration, num_states, num_actions, Memory="None",  gamma="None", training_epochs="None",job="train",sinario=0):
        if job=="train":
            self._gamma = gamma
            self._Memory = Memory
            self._reward_store = []
            self._cumulative_wait_store = []
            self._avg_queue_length_store = []
            self._training_epochs = training_epochs
        elif job=="test":
            self._sinario=sinario
            self._reward_episode = []
            self._queue_length_episode = []


        self._Model = Model
        self._TrafficGen = TrafficGen
        self._step = 0
        self._sumo_cmd = sumo_cmd
        self._max_steps = max_steps
        self._green_duration = green_duration
        self._yellow_duration = yellow_duration
        self._num_states = num_states
        self._num_actions = num_actions
        self._job=job

    def run(self, episode, epsilon=0):
        start_time = timeit.default_timer()

        if(self._job=="train"):
            self._TrafficGen.generate_routefile(seed=episode)
            traci.start(self._sumo_cmd)
            print("Simulating...")

            # inits
            self._step = 0
            self._waiting_times = {}
            self._sum_neg_reward = 0
            self._sum_queue_length = 0
            self._sum_waiting_time = 0
            old_total_wait = 0
            old_state = -1
            old_action = -1

            while self._step < self._max_steps:

                # get current state of the intersection
                current_state = self._get_state()

                # calculate reward of previous action: (change in cumulative waiting time between actions)
                # waiting time = seconds waited by a car since the spawn in the environment, cumulated for every car in incoming lanes
                current_total_wait = self._collect_waiting_times()
                reward = old_total_wait - current_total_wait

                # saving the data into the memory
                if self._step != 0:
                    self._Memory.add_sample((old_state, old_action, reward, current_state))

                # choose the light phase to activate, based on the current state of the intersection
                action = self._choose_action(current_state, epsilon)

                # if the chosen phase is different from the last phase, activate the yellow phase
                if self._step != 0 and old_action != action:
                    self._set_yellow_phase(old_action)
                    self._simulate(self._yellow_duration)

                # execute the phase selected before
                self._set_green_phase(action)
                self._simulate(self._green_duration)

                # saving variables for later & accumulate reward
                old_state = current_state
                old_action = action
                old_total_wait = current_total_wait

                # saving only the meaningful reward to better see if the agent is behaving correctly
                if reward < 0:
                    self._sum_neg_reward += reward

            self._save_episode_stats()
            print("Total reward:", self._sum_neg_reward, "- Epsilon:", round(epsilon, 2))
            traci.close()
            simulation_time = round(timeit.default_timer() - start_time, 1)

            print("Training...")
            start_time = timeit.default_timer()
            for _ in range(self._training_epochs):
                self._replay(episode)
            training_time = round(timeit.default_timer() - start_time, 1)

            return simulation_time, training_time

        elif self._job=="test":
            # first, generate the route file for this simulation and set up sumo
            if self._sinario==0:
                self._TrafficGen.generate_routefile(seed=episode)
            else:
                self._TrafficGen.generate_sinario(seed=episode,sinario=self._sinario)
            traci.start(self._sumo_cmd)
            print("Simulating...")

            # inits
            self._step = 0
            self._waiting_times = {}
            old_total_wait = 0
            old_action = -1 # dummy init
            while self._step < self._max_steps:

                # get current state of the intersection
                current_state = self._get_state()

                # calculate reward of previous action: (change in cumulative waiting time between actions)
                # waiting time = seconds waited by a car since the spawn in the environment, cumulated for every car in incoming lanes
                current_total_wait = self._collect_waiting_times()
                reward = old_total_wait - current_total_wait

                # choose the light phase to activate, based on the current state of the intersection
                action = self._choose_action(current_state)

                # if the chosen phase is different from the last phase, activate the yellow phase
                if self._step != 0 and old_action != action:
                    self._set_yellow_phase(old_action)
                    self._simulate(self._yellow_duration)

                # execute the phase selected before
                self._set_green_phase(action)
                self._simulate(self._green_duration)

                # saving variables for later & accumulate reward
                old_action = action
                old_total_wait = current_total_wait
                if reward < 0:
                    self._reward_episode.append(reward)
                

            #print("Total reward:", np.sum(self._reward_episode))
            traci.close()
            simulation_time = round(timeit.default_timer() - start_time, 1)

            return np.sum(self._reward_episode),np.sum(self._queue_length_episode),np.sum(self._queue_length_episode)/self._max_steps

    def _simulate(self, steps_todo):
        if (self._step + steps_todo) >= self._max_steps:  # do not do more steps than the maximum allowed number of steps
            steps_todo = self._max_steps - self._step
            
        while steps_todo > 0:
            traci.simulationStep()  # simulate 1 step in sumo
            self._step += 1 # update the step counter
            steps_todo -= 1
            queue_length = self._get_queue_length()
            if self._job=="train":
                self._sum_queue_length += queue_length
                self._sum_waiting_time += queue_length # 1 step while wating in queue means 1 second waited, for each car, therefore queue_lenght == waited_seconds
            elif self._job=="test":
                self._queue_length_episode.append(queue_length)
    def _collect_waiting_times(self):
        incoming_roads = ["D2I","D2", "F2I", "AF2I", "G2I"]
        car_list = traci.vehicle.getIDList()
        for car_id in car_list:
            wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
            road_id = traci.vehicle.getRoadID(car_id)  # get the road id where the car is located
            if road_id in incoming_roads:  # consider only the waiting times of cars in incoming roads
                self._waiting_times[car_id] = wait_time
            else:
                if car_id in self._waiting_times: # a car that was tracked has cleared the intersection
                    del self._waiting_times[car_id] 
        total_waiting_time = sum(self._waiting_times.values())
        return total_waiting_time


    def _choose_action(self, state, epsilon=0):
        if random.random() < epsilon:
            return random.randint(0, self._num_actions - 1) # random action
        else:
            return np.argmax(self._Model.predict_one(state)) # the best action given the current state


    def _set_yellow_phase(self, old_action):

        yellow_phase_code = old_action*2 + 1 # obtain the yellow phase code, based on the old action (ref on environment.net.xml)
        traci.trafficlight.setPhase("INT", yellow_phase_code)


    def _set_green_phase(self, action_number):
        if action_number == 0:
            traci.trafficlight.setPhase("INT", PHASE_DandF_GREEN)
        elif action_number == 1:
            traci.trafficlight.setPhase("INT", PHASE_GandAF_GREEN)

    def _get_queue_length(self):
        halt_D = traci.edge.getLastStepHaltingNumber("D2I")
        halt_D =halt_D + traci.edge.getLastStepHaltingNumber("D2")
        halt_F = traci.edge.getLastStepHaltingNumber("F2I")
        halt_AF = traci.edge.getLastStepHaltingNumber("AF2I")
        halt_G = traci.edge.getLastStepHaltingNumber("G2I")
        queue_length = halt_D + halt_F + halt_AF + halt_G
        return queue_length


    def _get_state(self):
        state = np.zeros(self._num_states)
        car_list = traci.vehicle.getIDList()

        for car_id in car_list:
            lane_pos = traci.vehicle.getLanePosition(car_id)
            lane_id = traci.vehicle.getLaneID(car_id)
            lane_pos = 500- lane_pos  # inversion of lane pos, so if the car is close to the traffic light -> lane_pos = 0 --- 750 = max len of a road

            # distance in meters from the traffic light -> mapping into cells
            if lane_pos < 7:
                lane_cell = 0
            elif lane_pos < 14:
                lane_cell = 1
            elif lane_pos < 21:
                lane_cell = 2
            elif lane_pos < 28:
                lane_cell = 3
            elif lane_pos < 40:
                lane_cell = 4
            elif lane_pos < 60:
                lane_cell = 5
            elif lane_pos < 100:
                lane_cell = 6
            elif lane_pos < 160:
                lane_cell = 7
            elif lane_pos < 300:
                lane_cell = 8
            elif lane_pos < 500:
                lane_cell = 9
            

            # finding the lane where the car is located 

            if lane_id == "D2I_0" or lane_id == "D2I_1" or lane_id == "D2_0" or lane_id == "D2_1":
                lane_group = 0
            elif lane_id == "F2I_1" or lane_id == "F2I_0":
                lane_group = 1
            elif lane_id == "AF2I_1" or lane_id == "AF2I_0":
                lane_group = 2
            elif lane_id == "G2I_1" or lane_id == "G2I_0":
                lane_group = 3
            else:
                lane_group = -1

            if lane_group >= 1 and lane_group <= 7:
                car_position = int(str(lane_group) + str(lane_cell))  # composition of the two postion ID to create a number in interval 0-79
                valid_car = True
            elif lane_group == 0:
                car_position = lane_cell
                valid_car = True
            else:
                valid_car = False  # flag for not detecting cars crossing the intersection or driving away from it

            if valid_car:
                state[car_position] = 1  # write the position of the car car_id in the state array in the form of "cell occupied"
        return state


    def _replay(self,episode):
        if self._job=="train":
            batch = self._Memory.get_samples(self._Model.batch_size)

            if len(batch) > 0:  # if the memory is full enough
                states = np.array([val[0] for val in batch])  # extract states from the batch
                next_states = np.array([val[3] for val in batch])  # extract next states from the batch

                # prediction
                if episode>0:
                    q_s_a = self._Model.predict_batch(states)  # predict Q(state), for every sample
                    q_s_a_d = self._Model.predict_batch(next_states)  # predict Q(next_state), for every sample
                else:
                    q_s_a = self._Model.predict_batch(states)  # predict Q(state), for every sample
                    q_s_a_d = self._Model.predict_batch(next_states)  # predict Q(next_state), for every sample

                # setup training arrays
                x = np.zeros((len(batch), self._num_states))
                y = np.zeros((len(batch), self._num_actions))

                for i, b in enumerate(batch):
                    state, action, reward, _ = b[0], b[1], b[2], b[3]  # extract data from one sample
                    current_q = q_s_a[i]  # get the Q(state) predicted before
                    current_q[action] = reward + self._gamma * np.amax(q_s_a_d[i])  # update Q(state, action)
                    x[i] = state
                    y[i] = current_q  # Q(state) that includes the updated action value
                
                self._Model.train_batch(x, y)  # train the NN


    def _save_episode_stats(self):
        if (self._job=="train"):
            self._reward_store.append(self._sum_neg_reward)  # how much negative reward in this episode
            self._cumulative_wait_store.append(self._sum_waiting_time)  # total number of seconds waited by cars in this episode
            self._avg_queue_length_store.append(self._sum_queue_length / self._max_steps)  # average number of queued cars per step, in this episode
    def get_model(self):
      return self._Model

    @property
    def reward_store(self):
        return self._reward_store


    @property
    def cumulative_wait_store(self):
        return self._cumulative_wait_store

    @property
    def reward_episode(self):
        return self._reward_episode
    @property
    def avg_queue_length_store(self):
        return self._avg_queue_length_store



In [40]:
def set_sumo(gui, max_steps):
    """
    Configure various parameters of SUMO
    """
    # sumo things - we need to import python modules from the $SUMO_HOME/tools directory
    if 'SUMO_HOME' in os.environ:
        tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
        sys.path.append(tools)
    else:
        sys.exit("please declare environment variable 'SUMO_HOME'")

    # setting the cmd mode or the visual mode    
    if gui == False:
        sumoBinary = checkBinary('sumo')
    else:
        sumoBinary = checkBinary('sumo-gui')
 
    # setting the cmd command to run sumo at simulation time
    sumo_cmd = [sumoBinary, "-c",  "sim.sumocfg", "--no-step-log", "true", "--waiting-time-memory", str(max_steps),"--no-warnings","true"]

    return sumo_cmd


In [41]:

class TrainModel:
    def __init__(self, num_layers, width, batch_size, learning_rate, input_dim, output_dim):
        self._input_dim = input_dim
        self._output_dim = output_dim
        self._batch_size = batch_size
        self._learning_rate = learning_rate
        self._model = self._build_model(num_layers, width)


    def _build_model(self, num_layers, width):
        """
        Build and compile a fully connected deep neural network
        """
        inputs = keras.Input(shape=(self._input_dim,))
        x = layers.Dense(width, activation='relu')(inputs)
        for _ in range(num_layers):
            x = layers.Dense(width, activation='relu')(x)
        outputs = layers.Dense(self._output_dim, activation='linear')(x)

        model = keras.Model(inputs=inputs, outputs=outputs, name='my_model')
        model.compile(loss=losses.mean_squared_error, optimizer=Adam(learning_rate=self._learning_rate))
        return model
    

    def predict_one(self, state):
        """
        Predict the action values from a single state
        """
        state = np.reshape(state, [1, self._input_dim])
        return self._model.predict(state)


    def predict_batch(self, states):
        """
        Predict the action values from a batch of states
        """
        return self._model.predict(states)


    def train_batch(self, states, q_sa):
        """
        Train the nn using the updated q-values
        """
        self._model.fit(states, q_sa, epochs=1, verbose=0)


    def save_model(self, path):
        """
        Save the current model in the folder as h5 file and a model architecture summary as png
        """
        self._model.save(os.path.join(path, 'trained_model.h5'))
        plot_model(self._model, to_file=os.path.join(path, 'model_structure.png'), show_shapes=True, show_layer_names=True)


    @property
    def input_dim(self):
        return self._input_dim


    @property
    def output_dim(self):
        return self._output_dim


    @property
    def batch_size(self):
        return self._batch_size



In [42]:

class Visualization:
    def __init__(self, path, dpi):
            self._path = path
            self._dpi = dpi


    def save_data_and_plot(self, data, filename, xlabel, ylabel):
        """
        Produce a plot of performance of the agent over the session and save the relative data to txt
        """
        min_val = min(data)
        max_val = max(data)

        plt.rcParams.update({'font.size': 24})  # set bigger font size
 
        plt.plot(data)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        plt.margins(0)
        plt.ylim(min_val - 0.05 * abs(min_val), max_val + 0.05 * abs(max_val))
        fig = plt.gcf()
        fig.set_size_inches(20, 11.25)
        fig.savefig(os.path.join(self._path, 'plot_'+filename+'.png'), dpi=self._dpi)
        plt.close("all")

        with open(os.path.join(self._path, 'plot_'+filename + '_data.txt'), "w") as file:
            for value in data:
                    file.write("%s\n" % value)
    

In [36]:
class TestModel:
    def __init__(self, input_dim, model_path):
        self._input_dim = input_dim
        self._model = self._load_my_model(model_path)


    def _load_my_model(self, model_folder_path):
        """
        Load the model stored in the folder specified by the model number, if it exists
        """
        model_file_path = os.path.join(model_folder_path, 'trained_model.h5')
        
        if os.path.isfile(model_file_path):
            loaded_model = load_model(model_file_path)
            return loaded_model
        else:
            sys.exit("Model number not found")


    def predict_one(self, state):
        """
        Predict the action values from a single state
        """
        state = np.reshape(state, [1, self._input_dim])
        return self._model.predict(state)


    @property
    def input_dim(self):
        return self._input_dim

## TRAIN

In [None]:


gui = False
total_episodes = 300
max_steps = 5400
n_cars_generated = 1000
green_duration = 10
yellow_duration = 3
num_layers = 4
width_layers = 400
batch_size = 100
learning_rate = 0.001
training_epochs = 100
memory_size_min = 1000
memory_size_max = 100000
num_states = 40
num_actions = 2
gamma = 0.75
path='./'

sumo_cmd = set_sumo(gui,max_steps)

Model = TrainModel(
    num_layers, 
    width_layers, 
    batch_size, 
    learning_rate, 
    input_dim=num_states, 
    output_dim=num_actions
)

Memo = Memory(
    memory_size_max, 
    memory_size_min
)

TrafficGen = TrafficGenerator(
    max_steps, 
    n_cars_generated
)

Visualization = Visualization(
    path, 
    dpi=96
)
    
Simu = Simulation(
    Model=Model,
    Memory=Memo,
    TrafficGen=TrafficGen,
    sumo_cmd=sumo_cmd,
    gamma=gamma,
    max_steps=max_steps,
    green_duration=green_duration,
    yellow_duration=yellow_duration,
    num_states=num_states,
    num_actions=num_actions,
    training_epochs=training_epochs,
    job="train"
)

episode = 0
timestamp_start = datetime.datetime.now()

while episode < total_episodes:
    print('\n----- Episode', str(episode+1), 'of', str(total_episodes))
    epsilon = 1.0 - (episode /total_episodes)  # set the epsilon for this episode according to epsilon-greedy policy
    simulation_time, training_time = Simu.run(episode, epsilon)  # run the simulation
    print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
    episode += 1

print("\n----- Start time:", timestamp_start)
print("----- End time:", datetime.datetime.now())
print("----- Session info saved at:", path)

Model.save_model(path)


Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')

## TEST

In [None]:
gui = False
max_steps = 5400
n_cars_generated = 1000
green_duration = 10
yellow_duration = 3
num_states = 40
num_actions = 2
path='./'
try:
    traci.close()
except:
    pass
Model=TestModel(num_states,path)
TrafficGen = TrafficGenerator(
    max_steps, 
    n_cars_generated
)
sumo_cmd = set_sumo(gui,max_steps)



In [46]:
class sumo_classique():
    def __init__(self):
        self._reward_episode=[]
        self._waiting_times={}
        self.queue_length=0
    
    def _get_queue_length(self):
        halt_D = traci.edge.getLastStepHaltingNumber("D2I")
        halt_D =halt_D + traci.edge.getLastStepHaltingNumber("D2")
        halt_F = traci.edge.getLastStepHaltingNumber("F2I")
        halt_AF = traci.edge.getLastStepHaltingNumber("AF2I")
        halt_G = traci.edge.getLastStepHaltingNumber("G2I")
        queue_length = halt_D + halt_F + halt_AF + halt_G
        return queue_length
    def run(self,gui):
        if gui == False:
            sumoBinary = checkBinary('sumo')
        else:
            sumoBinary = checkBinary('sumo-gui')
        traci.start([sumoBinary, "-c", "sim.sumocfg"])
        old_total_wait=0
        while traci.simulation.getMinExpectedNumber() > 0:
            
            traci.simulationStep()
            self.queue_length += self._get_queue_length()
            current_total_wait = self._collect_waiting_times()
            reward = old_total_wait - current_total_wait
            if reward < 0:
                self._reward_episode.append(reward)
            old_total_wait = current_total_wait
        
        traci.close()  
        return np.sum(self._reward_episode),self.queue_length,self.queue_length/max_steps
    
    def _collect_waiting_times(self):
        incoming_roads = ["D2I","D2", "F2I", "AF2I", "G2I"]
        car_list = traci.vehicle.getIDList()
        for car_id in car_list:
            wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
            road_id = traci.vehicle.getRoadID(car_id)  # get the road id where the car is located
            if road_id in incoming_roads:  # consider only the waiting times of cars in incoming roads
                self._waiting_times[car_id] = wait_time
            else:
                if car_id in self._waiting_times: # a car that was tracked has cleared the intersection
                    del self._waiting_times[car_id] 
        total_waiting_time = sum(self._waiting_times.values())
        return total_waiting_time
    

In [None]:
Simu = Simulation(
    Model=Model,
    TrafficGen=TrafficGen,
    sumo_cmd=sumo_cmd,
    max_steps=max_steps,
    green_duration=green_duration,
    yellow_duration=yellow_duration,
    num_states=num_states,
    num_actions=num_actions,
    job="test",
    sinario=0
)
reward,delay,average_queue=Simu.run(episode=500)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)

In [50]:
try:
    traci.close()
except:
    pass
s=sumo_classique()
reward,delay,average_queue=s.run(False)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)

Cumulative negative reward :  -21780.0
Cumulative delay :  21392
Avg queue lentgh :  3.9614814814814814


In [51]:
Simu = Simulation(
    Model=Model,
    TrafficGen=TrafficGen,
    sumo_cmd=sumo_cmd,
    max_steps=max_steps,
    green_duration=green_duration,
    yellow_duration=yellow_duration,
    num_states=num_states,
    num_actions=num_actions,
    job="test",
    sinario=1
)
reward,delay,average_queue=Simu.run(episode=500)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)

Simulating...
Cumulative negative reward :  -1653.0
Cumulative delay :  5239
Avg queue lentgh :  0.9701851851851852


In [52]:
try:
    traci.close()
except:
    pass
s=sumo_classique()
reward,delay,average_queue=s.run(False)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)

Cumulative negative reward :  -22880.0
Cumulative delay :  24516
Avg queue lentgh :  4.54


In [53]:
Simu = Simulation(
    Model=Model,
    TrafficGen=TrafficGen,
    sumo_cmd=sumo_cmd,
    max_steps=max_steps,
    green_duration=green_duration,
    yellow_duration=yellow_duration,
    num_states=num_states,
    num_actions=num_actions,
    job="test",
    sinario=2
)
reward,delay,average_queue=Simu.run(episode=500)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)

Simulating...
Cumulative negative reward :  -1596.0
Cumulative delay :  4901
Avg queue lentgh :  0.9075925925925926


In [55]:
try:
    traci.close()
except:
    pass
s=sumo_classique()
reward,delay,average_queue=s.run(False)
print("Cumulative negative reward : ",reward)
print("Cumulative delay : ",delay)
print("Avg queue lentgh : ",average_queue)


Cumulative negative reward :  -16431.0
Cumulative delay :  19433
Avg queue lentgh :  3.5987037037037037
