In [92]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [93]:
import os
import traci
import sys
import argparse
import tensorflow as tf
import numpy as np
from dotenv import load_dotenv

# Import internal modules
from rl_package.rl_logic.Environnement import EnvironnementSumo
from rl_package.rl_logic.Agent import AgentSumo
from rl_package.params import *


In [94]:


# Load environment variables
load_dotenv()


#SUMO_BIN = r"C:/Program Files/rl_project/Eclipse/Sumo/bin/sumo.exe"
SIMUL_CONFIG = r"double_traffic/double_traffic.sumo.cfg"


WINDOW=2000
BATCH_SIZE=6
# SUMO command


In [95]:

def preprocess():
    """
    Determines the number of inputs and outputs required for the model.
    """
    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    env = EnvironnementSumo(sumoCmd, WINDOW)
    inputs_per_agents = []
    outputs_per_agents = []
    for trafficlight in env.trafficlights_ids:

    # Get the number of lanes that are not intersections
        n_lanes = len(env.control_lanes(trafficlight))
        inputs_per_agents.append(n_lanes*2)

        # Get the number of valid traffic light phases (excluding yellow phases)
        n_outputs = len(env.get_phase_without_yellow(trafficlight)[0])
        outputs_per_agents.append(n_outputs)

    # Get the number of agents

    env.close()
    return inputs_per_agents, outputs_per_agents  # Inputs: lane states (queue + vehicle count), Outputs: traffic light phases


In [96]:
preprocess()

 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 0 ACT 0 BUF 0)                      


([64, 64], [4, 4])

In [97]:

def train_models(inputs_per_agents, outputs_per_agents, type_model="DQN"):
    """
    Trains a reinforcement learning model to optimize traffic lights.
    Saves the trained model after completion.
    """

    agents = [AgentSumo(type_model, inputs, outputs) for inputs,outputs  in zip(inputs_per_agents,outputs_per_agents)]
    for agent in agents:
        agent.build_model()
        # model_path = f"models/{type_model}.keras"
        # if os.path.exists(model_path):
        #     print(f"🔄 Loading pre-trained model {type_model}...")
        #     agents[i].model_action=tf.keras.models.load_model(model_path)
        #     agents[i].model_target=tf.keras.models.load_model(model_path)
        #print('fvvgfv', agent.n_inputs, agent.n_outputs)
    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']

    for episode in range(EPISODE):
        print(f'🔄 Episode {episode}/{EPISODE}')
        env = EnvironnementSumo(sumoCmd, WINDOW)
        #epsilon = max(1 - episode / EPISODE, 0.01)  # Decaying epsilon for exploration
        epsilon=0.2
        # récupère le nom des agents
        traffic_lights = env.trafficlights_ids



        # appel une fonction et récupère une partie du dictionnaire
        states = [env.get_states_per_traffic_light(traffic_light) for traffic_light in traffic_lights]

        for _ in range(50):  # Steps per episode
            actions = [agent.epsilon_greedy_policy(np.array(states[i]),epsilon) for i, agent in enumerate(agents)]
            #print('ici')
            next_states, rewards = env.step(actions)
            for i in range(len(agents)):
                agents[i].add_to_memory(np.array(states[i]), np.array(actions[i]), np.array(rewards[i]), np.array(next_states[i]))
            states = next_states
            # Train the model if there is enough experience in memory

            if len(agents[0].replay_buffer) >= BATCH_SIZE * 1:
                #print('train')
                for agent in agents:
                    #print('la')
                    agent.training_step(BATCH_SIZE)

            # Stop the simulation if there are no vehicles left
            if env.get_total_number_vehicles() == 0:
                break

        # Update target network every 5 episodes for Double/Dueling DQN
        if episode % 5 == 0 and type_model != 'DQN':
            for agent in agents:
                agent.model_target.set_weights(agent.model_action.get_weights())

        env.close()

    # Save the trained model
    # model_path = f"models/{type_model}.keras"
    # agents[i].model_action.save(model_path)
    # print(f"✅ Model saved at: {model_path}")


In [98]:

def scenario(agent):
    """
    Runs a SUMO simulation using the trained agent.
    """
    sumoCmd = [SUMO_GUI_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    env = EnvironnementSumo(sumoCmd, WINDOW)
    env.full_simul(agent)


In [99]:

type_model = "2DQN"
inputs_per_agents, outputs_per_agents = preprocess()
train_models(inputs_per_agents, outputs_per_agents,type_model)


 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 0 ACT 0 BUF 0)                      
🚀 Création d'un nouveau modèle 2DQN...
🚀 Création d'un nouveau modèle 2DQN...
🔄 Episode 0/50
 Retrying in 1 seconds




Step #1000.00 (1ms ~= 10.00*RT, ~141000.00UPS, TraCI: 199ms, vehicles TOT 988 ACT 141 BUF 2 ~= 10.00*RT, ~134000.00UPS, TraCI: 286ms, vehicles TOT 752 ACT 134 BUF 1
🔄 Episode 1/50
 Retrying in 1 seconds




Step #1000.00 (1ms ~= 10.00*RT, ~243000.00UPS, TraCI: 215ms, vehicles TOT 861 ACT 243 BUF 3 ~= 10.00*RT, ~166000.00UPS, TraCI: 236ms, vehicles TOT 732 ACT 166 BUF 1
🔄 Episode 2/50
 Retrying in 1 seconds




Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 257ms, vehicles TOT 982 ACT 169 BUF 209)              ?*RT. ?UPS, TraCI: 228ms, vehicles TOT 784 ACT 160 BUF 80)              
🔄 Episode 3/50
 Retrying in 1 seconds




Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 189ms, vehicles TOT 1002 ACT 103 BUF 189)             ~= 10.00*RT, ~107000.00UPS, TraCI: 271ms, vehicles TOT 752 ACT 107 BUF 1
🔄 Episode 4/50
 Retrying in 1 seconds




Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 193ms, vehicles TOT 981 ACT 178 BUF 210)              ?*RT. ?UPS, TraCI: 245ms, vehicles TOT 768 ACT 97 BUF 96)               
🔄 Episode 5/50
 Retrying in 1 seconds




Interrupt signal received, trying to exit gracefully.440ms, vehicles TOT 421 ACT 100 BUF 1


KeyboardInterrupt: 