In [None]:
import os

# install relevant packages
INSTALL_PACKAGES = False

if INSTALL_PACKAGES:
    !pip install -q traci
    print("installed traci")
    !add-apt-repository ppa:sumo/stable -y > /dev/null
    print("add apt")
    !apt-get update -y > /dev/null
    print("apt update")
    !apt-get install -y sumo sumo-tools sumo-doc > /dev/null
    print("installed sumo")

    !pip install ipywidgets
    print("installed ipywidgets")
    
    os.environ['SUMO_HOME'] = "/usr/share/sumo/"

print("finished installing packages")

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import traci
import random
import time
from collections import deque
from tqdm import tqdm  # Import tqdm
import matplotlib.pyplot as plt
import scipy
from scipy.stats import norm
from traffic_generator import TrafficGenerator
from episodes_data import EpisodesData
from torch.nn.utils import clip_grad_norm_
import csv

# set the precision for numpy output
np.set_printoptions(precision=4)

# set the seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

print("finished importing packages")

In [None]:
# simulation configuration

# files
CONFIG_FILE = "./sumo_files/s2.sumocfg"
GUI_SETTINGS_FILE = "./sumo_files/viewsettings.xml"
ROU_FILE = "./sumo_files/s2.rou.xml"

# simulation command
SUMO_HEADLESS = "sumo"
SUMO_GUI = "sumo-gui"
SUMO_APP = SUMO_HEADLESS
STEP_LENGTH = "1"   # Simulation step length in seconds
TRAIN_MODEL_TIME_STEP = 4 # train the model every x seconds
TRAIN_MODEL_STEPS_SIZE = int(TRAIN_MODEL_TIME_STEP / float(STEP_LENGTH))
DELAY = "0"       # Simulation delay in seconds, used for gui mode
DEBUG = False
EPISODES_GUI = 200

# simulation parameters
STATE_SIZE = (4, 10, 5) # 4 lanes, 10 vehicles, 5 features (distance, speed, waiting_time, traffic_light_phase, traffic_light_duration)
INPUT_SIZE = np.prod(STATE_SIZE)
ACTION_SIZE = 2
MAX_STATE_SIZE = (4, 100, 5)
EPISODES = 200
MAX_SIM_TIME = 1000
MAX_STEPS = MAX_SIM_TIME / float(STEP_LENGTH)
N_CARS = 200

print("finished setting up simulation configuration")

In [None]:
# create the simulation traffic

gen = TrafficGenerator(ROU_FILE, MAX_SIM_TIME, float(STEP_LENGTH), int(N_CARS / 2), seed, distribution="uniform")
gen.generate_routefile(plot_distribution=True)

gen = TrafficGenerator(ROU_FILE, MAX_SIM_TIME, float(STEP_LENGTH), N_CARS, seed, distribution="normal")
gen.generate_routefile(plot_distribution=True)

In [6]:
import torch.nn.functional as F

def mish(input):
    return input * torch.tanh(F.softplus(input))

class Mish(nn.Module):
    def __init__(self): super().__init__()
    def forward(self, input): return mish(input)

# Actor module, categorical actions only
class Actor_mish(nn.Module):
    def __init__(self, state_dim, n_actions, activation=nn.Tanh):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(state_dim, 256),
            activation(),
            nn.Linear(256, 128),
            activation(),
            nn.Linear(128, n_actions),
            nn.Softmax()
        )
    
    def forward(self, X):
        return self.model(X)

# Critic module
class Critic_mish(nn.Module):
    def __init__(self, state_dim, activation=nn.Tanh):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(state_dim, 256),
            activation(),
            nn.Linear(256, 128),
            activation(),
            nn.Linear(128, 1)
        )
    
    def forward(self, X):
        return self.model(X)

In [None]:
class Simulation:
    def __init__(self):
        self.vehicles_info = {}
        self.arrived_vehicles = 0
        self.total_waiting_time = 0
        self.avg_speed = 0
        self.epsilon = 0.2
        self.current_green_lane = None
        self.prev_green_lane = None
        self.prev_green_lane_duration = None

    def start(self, episode):
        try:
            
            if episode == 0: # human mode
                SUMO_APP = SUMO_GUI
                DELAY = "200"
            elif episode % EPISODES_GUI == 0:
                SUMO_APP = SUMO_GUI
                DELAY = "100"
            else:
                SUMO_APP = SUMO_HEADLESS
                DELAY = "0"

            SUMO_CMD = [SUMO_APP, "-c", CONFIG_FILE, "-g", GUI_SETTINGS_FILE, "--step-length", STEP_LENGTH ,"--delay", DELAY, "--time-to-teleport" , "-1", "--start", "--quit-on-end", "--keep-after-arrival", str(MAX_SIM_TIME) ]
            traci.start(SUMO_CMD)
        except Exception as e:
            print(f"Error starting simulation: {e}")
            traci.close()

    def close(self):
        traci.close()

    def calc_steps_to_advance(self, action_changed_phase):
        if action_changed_phase:
            time_to_step = 4 # 3 yellow + 1 green
        else:
            time_to_step = 1
            
        n_steps = int(time_to_step / float(STEP_LENGTH))
        return n_steps
    
    def advance(self, n_steps=1):
        for _ in range(n_steps):
            traci.simulationStep()
            self.arrived_vehicles += traci.simulation.getArrivedNumber()
            # store the vehicles info
            for vehicle_id in traci.vehicle.getIDList():
                distance_travelled = traci.vehicle.getDistance(vehicle_id)
                time_alive = traci.simulation.getTime() - traci.vehicle.getDeparture(vehicle_id)
                waiting_time = traci.vehicle.getAccumulatedWaitingTime(vehicle_id)
                self.vehicles_info[vehicle_id] = [distance_travelled, time_alive, waiting_time]
                
    def get_state(self):
        lane_counter = [0, 0, 0, 0]
        state = np.zeros(MAX_STATE_SIZE)
        for vehicle_id in traci.vehicle.getIDList():
            upcoming_tls = traci.vehicle.getNextTLS(vehicle_id)
            if len(upcoming_tls) != 0:
                speed = traci.vehicle.getSpeed(vehicle_id)
                waiting_time = traci.vehicle.getAccumulatedWaitingTime(vehicle_id)
                tl_id, tl_index, distance, tl_state = upcoming_tls[0]
                vehicle_state_index = lane_counter[tl_index]
                state[tl_index][vehicle_state_index] = [distance, speed, waiting_time, 0, 0]
                lane_counter[tl_index] += 1

        tl_id = traci.trafficlight.getIDList()[0]
        current_phase = traci.trafficlight.getPhase(tl_id)
        phase_duration = traci.trafficlight.getSpentDuration(tl_id)
        if (current_phase == 0 or current_phase == 3): # 0 has green or about to turn green
            green_lane = 0
            state[[0,2],:,3:] = [1, phase_duration]
            state[[1,3],:,3:] = [0, phase_duration]
        else:
            green_lane = 1
            state[[0,2],:,3:] = [0, phase_duration]
            state[[1,3],:,3:] = [1, phase_duration]

        # sort by distance, take only the first vehicles and flatten the state STATE_SIZE -> INPUT_SIZE
        state = np.round(np.flip(np.sort(state, axis=1), axis=1), 1)
        state = state[:, :STATE_SIZE[1], :]
        state = state.flatten()
        return torch.tensor(state, dtype=torch.float32).unsqueeze(0)

    
    def calc_reward(self):
        # choose the reward policy 
        reward_policy = "average_speed" # "waiting_time" or "average_speed" or "mixed"
        reward_alpha = 0.5
        
        # init parameters
        total_cars_in_simulation = traci.vehicle.getIDCount()
        car_speeds = []
        waiting_times = []

        # calculate the reward
        for vehicle_id in traci.vehicle.getIDList():
            car_speeds.append(traci.vehicle.getSpeed(vehicle_id))
            if traci.vehicle.getNextTLS(vehicle_id):
                waiting_times.append(traci.vehicle.getAccumulatedWaitingTime(vehicle_id))

        
        total_speed = sum(car_speeds)
        total_waiting_time = sum(waiting_times)

        avg_speed = 0 if len(car_speeds) == 0 else (total_speed / len(car_speeds))
        avg_waiting_time = 0 if len(waiting_times) == 0 else (total_waiting_time / len(waiting_times))


        # calculate the reward based on the reward policy
        if reward_policy == "waiting_time":
            reward = -avg_waiting_time
        elif reward_policy == "average_speed":
            reward = avg_speed
        elif reward_policy == "mixed":
            reward = (reward_alpha * avg_speed) - ((1 - reward_alpha) * avg_waiting_time)
        
        return reward


    def perform_action(self, action):

        tl_id = traci.trafficlight.getIDList()[0]
        current_phase = traci.trafficlight.getPhase(tl_id)
        if (current_phase == 0 or current_phase == 3): # 0 has green or about to turn green
            current_green_lane = 0
        else:
            current_green_lane = 1

        switch_green_lane = (current_green_lane != action)

        if switch_green_lane:
            if current_green_lane == 0:
                new_phase = 1 # turn to yellow for lane 1
            else:
                new_phase = 3 # turn to yellow for lane 0
        else:
            new_phase = current_phase

        self.prev_green_lane = current_green_lane
        self.prev_green_lane_duration = traci.trafficlight.getSpentDuration(tl_id)
        self.current_green_lane = action

        traci.trafficlight.setPhase(tl_id, new_phase) # set again the same phase (to reset the duration)
        return switch_green_lane
    
    def print_results(self, episode):
        
        total_vehicles = self.arrived_vehicles
        total_sim_time = round(traci.simulation.getTime(), 1)
        vehicles_per_minute = round(total_vehicles / total_sim_time * 60, 2)
        total_waiting_time = sum(self.vehicles_info[vehicle_id][2] for vehicle_id in self.vehicles_info)

        # calc average speed of all cars (distance / time_alive)
        for vehicle_id in self.vehicles_info:
            dist = self.vehicles_info[vehicle_id][0]
            time_alive = self.vehicles_info[vehicle_id][1]
            avg_speed = dist / time_alive
            
        total_accumulated_avg_speed = sum(self.vehicles_info[vehicle_id][0] / self.vehicles_info[vehicle_id][1] for vehicle_id in self.vehicles_info)
        avg_speed = 0 if total_vehicles == 0 else (total_accumulated_avg_speed / total_vehicles)
        
        # store the results for plotting
        self.total_waiting_time = total_waiting_time
        self.avg_speed = avg_speed

        print(f"Episode {episode}: {total_vehicles} vehicles, {total_sim_time} seconds ({vehicles_per_minute} vehicles/min), total waiting time: {total_waiting_time}, average speed: {avg_speed:.2f}")

print("finished defining the simulation class")

In [None]:
actor_mish = Actor_mish(INPUT_SIZE, ACTION_SIZE, activation=Mish)
critic_mish = Critic_mish(INPUT_SIZE, activation=Mish)

actor = actor_mish
critic = critic_mish

# define hyperparameters
actor_adam_optimizer = optim.Adam(actor.parameters(), lr=1e-5) # optimizer for the actor
critic_adam_optimizer = optim.Adam(critic.parameters(), lr=1e-4) # optimizer for the critic
gamma = 0.99 # discount factor
exploration_epsilon = 0.2
clipping_epsilon = 0.1
loss_fn = nn.MSELoss() # loss function

print("finished creating the agent")

In [9]:
entopry_coef = 0.1

def policy_loss(old_log_prob, log_prob, entropy, advantage, eps):
    ratio = (log_prob - old_log_prob).exp()
    clipped = torch.clamp(ratio, 1-eps, 1+eps)*advantage
    
    min = torch.min(ratio*advantage, clipped)
    entropy_bonus = (entopry_coef * entropy).mean()
    loss = - (min + entropy_bonus)
    
    return loss

import torch.nn.functional as F

In [None]:
# %%script echo "Skip the training loop"

# actor.load_state_dict(torch.load("model_checkpoints/actor_weights_100.pth"))
# critic.load_state_dict(torch.load("model_checkpoints/critic_weights_100.pth"))

# store for each action all its probabilities
# file = open('probs_history.csv', 'w', newline='')
# writer = csv.writer(file)
# writer.writerow(['episode', 'action', 'step', 'prob0', 'prob1'])

episodes_data = EpisodesData()
for episode in tqdm(range(1, EPISODES + 1), desc="Training Episodes"):
    tqdm.write(f"Starting episode {episode}")
    
    simulation = Simulation()
    simulation.start(episode)

    prev_prob_act_log = 0
    action = 0
    state = np.zeros(STATE_SIZE).flatten()
    state = torch.tensor(state ,dtype=torch.float32).unsqueeze(0)
    DEBUG = True if episode % EPISODES_GUI == 0 else False

    step = 0
    while traci.simulation.getMinExpectedNumber() > 0 and step < MAX_STEPS:

        any_cars_in_simulation = traci.vehicle.getIDCount()
        training_step = any_cars_in_simulation

        if (training_step):
            random_action = random.random() < exploration_epsilon
            if random_action: # exploration move
                action = torch.randint(0, ACTION_SIZE, (1,))
                probs = torch.ones(ACTION_SIZE) / ACTION_SIZE
                prob_act_log = torch.log(torch.tensor(1.0 / ACTION_SIZE, requires_grad=True))
                entropy = torch.tensor(1.0 / ACTION_SIZE) * torch.log(torch.tensor(1.0 / ACTION_SIZE))
            else: # use the actor to predict the action
                probs = actor(state)
                dist = torch.distributions.Categorical(probs = probs)
                action = dist.sample()
                prob_act_log = dist.log_prob(action)
                entropy = dist.entropy()
                if episode % 10 == 0:
                    probs_list = probs.detach().numpy()[0]
                    writer.writerow([episode, action.item(), step, probs_list[0], probs_list[1]])

            action = action.item()

        # advance the simulation
        action_changed_phase = simulation.perform_action(action)
        n_steps = simulation.calc_steps_to_advance(action_changed_phase)
        simulation.advance(n_steps)
        step += n_steps

        if (training_step):
            # get the next state and calculate the reward
            next_state = simulation.get_state()
            reward = simulation.calc_reward()
            critic_state = critic(state)
            critic_next_state = critic(next_state)
            advantage = reward + (gamma * critic_next_state - critic_state)
            prev_state = state
            state = next_state

            if prev_prob_act_log :
                # train the actor if the action taken was not a random exploration action
                if True:
                    actor_loss = policy_loss(prev_prob_act_log.detach(), prob_act_log, entropy, advantage.detach(), clipping_epsilon)
                    actor_adam_optimizer.zero_grad()
                    actor_loss.backward()
                    actor_adam_optimizer.step()

                    with torch.no_grad():
                        probs = actor(prev_state)

                # train the critic
                critic_loss = advantage.abs().mean()
                critic_adam_optimizer.zero_grad()   
                critic_loss.backward()
                critic_adam_optimizer.step()

            prev_prob_act_log = prob_act_log       

    print(f"Episode {episode} finished\n")
    
    if (episode % 10 == 0):
        torch.save(actor.state_dict(), f"model_checkpoints/actor_weights_{episode}_mixed.pth")
        torch.save(critic.state_dict(), f"model_checkpoints/critic_weights_{episode}_mixed.pth")

    simulation.print_results(episode)
    episodes_data.add_episode_data(simulation)
    simulation.close()

episodes_data.plot_training_results()
file.close()

In [None]:
%%script echo "Skip the testing loop"

episodes_data = EpisodesData()

# Load the model in evaluation mode
actor.load_state_dict(torch.load("model_checkpoints/actor_weights_40.pth"))
actor.eval()

SUMO_APP = SUMO_GUI # SUMO_GUI or SUMO_HEADLESS
DELAY = "1000"
SUMO_CMD = [SUMO_APP, "-c", CONFIG_FILE, "-g", GUI_SETTINGS_FILE, "--step-length", STEP_LENGTH ,"--delay", DELAY, "--start", "--quit-on-end"]

simulation = Simulation()
simulation.start(0)

state = np.zeros(STATE_SIZE).flatten()
state = torch.tensor(state ,dtype=torch.float32).unsqueeze(0)

step = 0
while traci.simulation.getMinExpectedNumber() > 0 and step < MAX_STEPS:

    if (step % TRAIN_MODEL_STEPS_SIZE == 0):
        with torch.no_grad():
            # Use the actor to predict action probabilities
            probs = actor(state)
            dist = torch.distributions.Categorical(probs = probs)
            action = dist.sample()
            prob_act_log = dist.log_prob(action)
            action = action.item()

            
            # perform the action
            simulation.perform_action(action)
    
    # advance the simulation
    traci.simulationStep()
    next_state = simulation.get_state()

    if (step % TRAIN_MODEL_STEPS_SIZE == 0):
        state = next_state

    step += 1

simulation.print_results(episode)
episodes_data.add_episode_data(simulation)
simulation.close()

In [None]:
%%script echo "Skip the simulation without the model"

# run the simulation without the model

simulation = Simulation()
simulation.start(0)

step = 0
while traci.simulation.getMinExpectedNumber() > 0 and step < MAX_STEPS:
    simulation.advance(1)
    step += 1

simulation.print_results(0)
simulation.close()

In [None]:
%%script echo "Skip the human controlled simulation"

import threading
import ipywidgets as widgets
from IPython.display import display

user_input = threading.Event()
stop_input_thread = False

def wait_for_input():
    global stop_input_thread
    while not stop_input_thread:
        input("Press Enter to switch the lights phase")
        user_input.set()

# Start a separate thread to handle user input
human_control_thread = threading.Thread(target=wait_for_input, daemon=True)
human_control_thread.start()

Simulation = Simulation()
Simulation.start(0)

# add arrow image to the simulation
img_id = "arrow_img"
traci.poi.add(img_id, 50, 50, (255, 0, 0, 255), layer=340, width=20, height=20, imgFile="right_left.png")

tl_id = traci.trafficlight.getIDList()[0]
step = 0
while traci.simulation.getMinExpectedNumber() > 0 and step < MAX_STEPS:
    Simulation.advance(1)
    step += 1
    if user_input.is_set():
        # switch green lane and update the arrow image
        current_phase = traci.trafficlight.getPhase(tl_id)
        next_phases = (current_phase + 1) % 4
        traci.trafficlight.setPhase(tl_id, next_phases)
        img_file = "up_down.png" if next_phases == 0 or next_phases == 3 else "right_left.png"
        traci.poi.remove(img_id)
        traci.poi.add(img_id, 50, 50, (255, 0, 0, 255), layer=340, width=20, height=20, imgFile=img_file)
        user_input.clear()
           
Simulation.print_results(0)
Simulation.close()

# Stop the input thread
stop_input_thread = True
human_control_thread.join()