In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import traci
import sys
import argparse
import tensorflow as tf
import numpy as np
from dotenv import load_dotenv

# Import internal modules
from rl_package.rl_logic.Environnement import EnvironnementSumo
from rl_package.rl_logic.Agent import AgentSumo
from rl_package.params import *
print(BATCH_SIZE,NAME_SIMULATION,WINDOW)
print(SUMO_GUI_BIN,SUMO_BIN)
print(SIMUL_CONFIG)

32 simu_opera 20
/Users/arseneclaustre/sumo/bin/sumo-gui /Users/arseneclaustre/sumo/bin/sumo
/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.sumocfg


In [3]:


# Load environment variables
#load_dotenv()


#SUMO_BIN = r"C:/Program Files/rl_project/Eclipse/Sumo/bin/sumo.exe"
# SIMUL_CONFIG = r"double_traffic/double_traffic.sumo.cfg"
#SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/double_traffic/double_traffic.sumo.cfg"
#SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.sumocfg"
# SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera/osm.polycfg"
#BATCH_SIZE=6
# SUMO command

In [4]:
def preprocess():
    """
    Determines the number of inputs and outputs required for each agent.
    """
    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    print(sumoCmd)
    env = EnvironnementSumo(sumoCmd, WINDOW)
    inputs_per_agents = []
    outputs_per_agents = []
    positions_phases = []

    for trafficlight in env.trafficlights_ids:
        # Get the number of lanes controlled by this traffic light
        n_lanes = len(env.control_lanes(trafficlight))
        inputs_per_agents.append(n_lanes * 3)  # Inputs: queue + vehicle count

        # Get the number of valid traffic light phases (excluding yellow)
        n_phases,position = env.get_phase_without_yellow(trafficlight)
        #print(f'trafficlight :{trafficlight},\n lane associated {env.control_lanes(trafficlight)}')
        n_outputs = len(n_phases)
        positions_phases.append(position)
        outputs_per_agents.append(n_outputs)

    env.close()
    return inputs_per_agents, outputs_per_agents,positions_phases  # List of inputs, outputs per agent, and the postion phases of each trafficlight


In [5]:
def train_models(inputs_per_agents, outputs_per_agents, position_phases, type_model="DQN"):
    """
    Trains multiple reinforcement learning agents to optimize traffic lights.
    Saves each model separately.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    # Load pre-trained models if available
    for i, agent in enumerate(agents):
        agent.build_model()
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.model_action = tf.keras.models.load_model(model_path)
            if agent.model_target:  # For Double/Dueling DQN
                agent.model_target = tf.keras.models.load_model(model_path)

    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings','--scale','0.3']



    for episode in range(EPISODE):
        print(f'🔄 Episode {episode}/{EPISODE}')
        env = EnvironnementSumo(sumoCmd, WINDOW)

        #Store the position phases of the trafficlight in the environment
        env.position_phases = position_phases

        epsilon = max(1 - episode / EPISODE, 0.01)  # Decaying epsilon for exploration

        traffic_lights = env.trafficlights_ids
        states = [env.get_states_per_traffic_light(traffic_light) for traffic_light in traffic_lights]

        for _ in range(75):  # Steps per episode
            actions = [agent.epsilon_greedy_policy(np.array(states[i]), epsilon) for i, agent in enumerate(agents)]
            next_states, rewards = env.step(actions)

            for i in range(len(agents)):
                agents[i].add_to_memory(np.array(states[i]), np.array(actions[i]), np.array(rewards[i]), np.array(next_states[i]))

            states = next_states

            if len(agents[0].replay_buffer) >= BATCH_SIZE *5:
                for agent in agents:
                    agent.training_step(BATCH_SIZE)

            if env.get_total_number_vehicles() == 0:
                break  # Stop simulation if no vehicles left

        if episode % 5 == 0 and type_model != 'DQN':
            for agent in agents:
                agent.model_target.set_weights(agent.model_action.get_weights())

        env.close()

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        agent.model_action.save(model_path)
        print(f"✅ Model saved for Agent {i} at: {model_path}")


In [6]:

def load_trained_agents(inputs_per_agents, outputs_per_agents, type_model="DQN"):
    """
    Loads pre-trained agents from saved model files.
    If any model is missing, exits the program.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.build_model()
            agent.model_action = tf.keras.models.load_model(model_path)
        else:
            print(f"❌ No pre-trained model found for Agent {i}.")
            sys.exit(1)

    return agents

In [7]:
def scenario(agents,positions_phases):
    """
    Runs a SUMO simulation using the trained agents.
    """
    sumoCmd = [SUMO_GUI_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings','--scale','0.3']
    print(WINDOW)
    print(sumoCmd)
    env = EnvironnementSumo(sumoCmd, WINDOW)
    #Store the position phases of the trafficlight in the environment
    env.position_phases = positions_phases
    env.full_simul(agents)

In [None]:

type_model = "2DQN"
print(EPISODE,NAME_SIMULATION)
EPISODE = 1
inputs_per_agents, outputs_per_agents,positions_phases = preprocess()
print(f'inputs : {inputs_per_agents}')
print(f'outputs : {outputs_per_agents}')
print(f'positions : {positions_phases}')
train_models(inputs_per_agents, outputs_per_agents, positions_phases, type_model)
agents = load_trained_agents(inputs_per_agents, outputs_per_agents, type_model)
scenario(agents,positions_phases)


1 simu_opera
 Retrying in 1 seconds
***Starting server on port 56862 ***




Loading net-file from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.net.xml.gz' ... done (27ms).
Loading additional-files from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.poly.xml.gz' ... done (4ms).
Loading route-files incrementally from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.passenger.trips.xml'
Loading done.
Simulation version v1_22_0+0748-755b84baf8f started with time: 0.00.
[Phase(duration=42.0, state='GGGrrrr', minDur=5.0, maxDur=50.0, next='(1,)'), Phase(duration=42.0, state='rrrGGGG', minDur=5.0, maxDur=50.0, next='(3,)')]
[Phase(duration=42.0, state='GGggGGGgrrr', minDur=5.0, maxDur=50.0, next='(1,)'), Phase(duration=42.0, state='rrrrrrrrGGG', minDur=5.0, maxDur=50.0, next='(3,)')]
[Phase(duration=42.0, state='gGGGrrr', minDur=5.0, maxDur=50.0, next='(1,)'), Phase(duration=42.0, state='rrrrGGG', minDur=5.0, maxDur=50.0, next='(3,)')]
[Phase(duration=42.0, state='GGggGGGGgrrrr', minDur=5.0, maxDur=50.0, next='(1,



Loading net-file from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.net.xml.gz' ... done (35ms).
Loading additional-files from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.poly.xml.gz' ... done (4ms).
Loading route-files incrementally from '/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Opera_2/osm.passenger.trips.xml'
Loading done.
Simulation version v1_22_0+0748-755b84baf8f started with time: 0.00.
Simulation ended at time: 1500.00.
Reason: TraCI requested termination.
Performance:
 Duration: 1.28s
 TraCI-Duration: 1.09s
 Real time factor: 1176.47
 UPS: 29187.450980
Vehicles:
 Inserted: 234 (Loaded: 861)
 Running: 24
 Waiting: 0
 Teleports: 3 (Yield: 3)
Statistics (avg of 210):
 RouteLength: 581.75
 Speed: 4.22
 Duration: 165.79
 WaitingTime: 74.83
 TimeLoss: 95.04
 DepartDelay: 0.79
DijkstraRouter answered 235 queries and explored 65.10 edges on average.
DijkstraRouter spent 0.00s answering queries (0.00ms on average).
✅ Model saved for Ag

In [None]:
print(NAME_SIMULATION)

simu_opera


In [None]:
type_model = "2DQN"
print(SIMUL_CONFIG)
#SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
EPISODE = 50
print(NAME_SIMULATION)
inputs_per_agents, outputs_per_agents,positions_phases = preprocess()

/Users/arseneclaustre/code/psels/RL_traffic/Traffic/double_traffic/double_traffic.sumo.cfg
simu_2_carrefours
 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 4ms, vehicles TOT 0 ACT 0 BUF 0)                      


In [None]:
agents = load_trained_agents(inputs_per_agents, outputs_per_agents, type_model)

🔄 Loading pre-trained model for Agent 0 from models/simu_opera_2DQN_Agent0.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 1 from models/simu_opera_2DQN_Agent1.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 2 from models/simu_opera_2DQN_Agent2.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 3 from models/simu_opera_2DQN_Agent3.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 4 from models/simu_opera_2DQN_Agent4.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 5 from models/simu_opera_2DQN_Agent5.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 6 from models/simu_opera_2DQN_Agent6.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 7 from models/simu_opera_2DQN_Agent7.keras...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-tr

In [None]:
scenario(agents,positions_phases)

20
 Retrying in 1 seconds


KeyboardInterrupt: 