In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
import os
import traci
import sys
import argparse
import tensorflow as tf
import numpy as np
from dotenv import load_dotenv

# Import internal modules
from rl_package.rl_logic.Environnement import EnvironnementSumo
from rl_package.rl_logic.Agent import AgentSumo
from rl_package.params import *
print(BATCH_SIZE,NAME_SIMULATION)

32 simu_simple


In [51]:


# Load environment variables
load_dotenv()


#SUMO_BIN = r"C:/Program Files/rl_project/Eclipse/Sumo/bin/sumo.exe"
# SIMUL_CONFIG = r"double_traffic/double_traffic.sumo.cfg"
SIMUL_CONFIG
WINDOW=2000
BATCH_SIZE=6
# SUMO command

In [52]:
def preprocess():
    """
    Determines the number of inputs and outputs required for each agent.
    """
    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    env = EnvironnementSumo(sumoCmd, WINDOW)
    inputs_per_agents = []
    outputs_per_agents = []
    positions_phases = []

    for trafficlight in env.trafficlights_ids:
        # Get the number of lanes controlled by this traffic light
        n_lanes = len(env.control_lanes(trafficlight))
        inputs_per_agents.append(n_lanes * 3)  # Inputs: queue + vehicle count

        # Get the number of valid traffic light phases (excluding yellow)
        n_phases,position = env.get_phase_without_yellow(trafficlight)
        #print(f'trafficlight :{trafficlight},\n lane associated {env.control_lanes(trafficlight)}')
        n_outputs = len(n_phases)
        positions_phases.append(position)
        outputs_per_agents.append(n_outputs)

    env.close()
    return inputs_per_agents, outputs_per_agents,positions_phases  # List of inputs, outputs per agent, and the postion phases of each trafficlight


In [53]:
def train_models(inputs_per_agents, outputs_per_agents, position_phases, type_model="DQN"):
    """
    Trains multiple reinforcement learning agents to optimize traffic lights.
    Saves each model separately.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    # Load pre-trained models if available
    for i, agent in enumerate(agents):
        agent.build_model()
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.model_action = tf.keras.models.load_model(model_path)
            if agent.model_target:  # For Double/Dueling DQN
                agent.model_target = tf.keras.models.load_model(model_path)

    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']



    for episode in range(EPISODE):
        print(f'🔄 Episode {episode}/{EPISODE}')
        env = EnvironnementSumo(sumoCmd, WINDOW)

        #Store the position phases of the trafficlight in the environment
        env.position_phases = position_phases

        epsilon = max(1 - episode / EPISODE, 0.01)  # Decaying epsilon for exploration

        traffic_lights = env.trafficlights_ids
        states = [env.get_states_per_traffic_light(traffic_light) for traffic_light in traffic_lights]

        for _ in range(50):  # Steps per episode
            actions = [agent.epsilon_greedy_policy(np.array(states[i]), epsilon) for i, agent in enumerate(agents)]
            next_states, rewards = env.step(actions)

            for i in range(len(agents)):
                agents[i].add_to_memory(np.array(states[i]), np.array(actions[i]), np.array(rewards[i]), np.array(next_states[i]))

            states = next_states

            if len(agents[0].replay_buffer) >= BATCH_SIZE *1:
                for agent in agents:
                    agent.training_step(BATCH_SIZE)

            if env.get_total_number_vehicles() == 0:
                break  # Stop simulation if no vehicles left

        # Update target networks every 5 episodes for Double/Dueling DQN
        if episode % 5 == 0 and type_model != 'DQN':
            for agent in agents:
                agent.model_target.set_weights(agent.model_action.get_weights())

        env.close()

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        agent.model_action.save(model_path)
        print(f"✅ Model saved for Agent {i} at: {model_path}")


In [54]:

def load_trained_agents(inputs_per_agents, outputs_per_agents, type_model="DQN"):
    """
    Loads pre-trained agents from saved model files.
    If any model is missing, exits the program.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.build_model()
            agent.model_action = tf.keras.models.load_model(model_path)
        else:
            print(f"❌ No pre-trained model found for Agent {i}.")
            sys.exit(1)

    return agents

In [55]:
def scenario(agents,positions_phases):
    """
    Runs a SUMO simulation using the trained agents.
    """
    sumoCmd = [SUMO_GUI_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    env = EnvironnementSumo(sumoCmd, WINDOW)
    #Store the position phases of the trafficlight in the environment
    env.position_phases = positions_phases
    env.full_simul(agents)

In [56]:

type_model = "2DQN"
SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
WINDOW=2000
EPISODE = 20
print(NAME_SIMULATION)
inputs_per_agents, outputs_per_agents,positions_phases = preprocess()
print(f'inputs : {inputs_per_agents}')
print(f'outputs : {outputs_per_agents}')
print(f'positions : {positions_phases}')
train_models(inputs_per_agents, outputs_per_agents, positions_phases, type_model)
agents = load_trained_agents(inputs_per_agents, outputs_per_agents, type_model)
scenario(agents,positions_phases)


simu_simple
 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 0 ACT 0 BUF 0)                      
inputs : [36]
outputs : [4]
positions : [[0, 2, 4, 6]]
🚀 Création d'un nouveau modèle 2DQN...
🔄 Episode 0/20
 Retrying in 1 seconds




Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 55ms, vehicles TOT 770 ACT 127 BUF 4)                 ?*RT. ?UPS, TraCI: 57ms, vehicles TOT 559 ACT 147 BUF 5)                
🔄 Episode 1/20
 Retrying in 1 seconds




state [ 0.    0.    0.    0.    1.2   0.    0.    0.    0.    0.    6.99  0.
  0.    0.    0.    0.    2.    0.    0.    0.    0.    0.   15.    0.
  1.    0.   14.    1.    4.    2.    5.    0.    0.    1.   19.    0.  ]
Q_values [-3.0239549 -6.9324307 16.624432  -3.2487533]
state [ 0.    1.14  1.56  0.    0.    0.78  0.    0.    0.    0.    0.    0.
  0.    1.   10.    0.    0.    2.    0.    0.    0.    0.    0.    0.
  0.    2.   19.    2.    2.    4.    4.    0.    0.    1.   11.    0.  ]
Q_values [-3.665717   5.5746155  6.159738  -0.8425819]
Step #720.00 (1ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 558 ACT 64 BUF 0)                  state [ 0.    0.    0.    0.   15.9  20.13  0.    0.    0.    0.    3.67  0.
  0.    0.    0.    0.   17.   29.    0.    0.    0.    0.    4.    0.
  0.    0.    2.    5.   21.   33.    2.    0.    0.    0.    6.    0.  ]
Q_values [  6.6328297 -49.785862    4.0542912 -29.086645 ]
Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 54ms, vehicles TOT 770 ACT 88 BUF 4) 



state [ 0.    0.   65.7   0.    0.21  4.93  0.    0.    9.02  0.    1.63  1.61
  0.    0.   34.    0.    2.    6.    0.    0.    4.    0.    9.    2.
  0.    1.   37.    2.    2.    6.    5.    0.    4.    1.   13.    2.  ]
Q_values [-98.11876   29.008533  76.746704  38.885384]
state [ 0.    0.   79.86  0.    0.    7.33  0.    0.   10.62  0.    0.    2.41
  0.    0.   37.    0.    0.    6.    0.    0.    4.    0.    0.    2.
  0.    0.   40.    2.    2.    8.    4.    0.    4.    0.    4.    2.  ]
Q_values [-84.21031  30.75431  65.25969  40.42184]
state [ 0.    5.14  1.64  0.    3.53  0.    0.    5.36  0.    0.   25.61  0.
  0.    2.   10.    0.    4.    0.    0.    2.    0.    0.   25.    0.
  2.    4.   22.    2.    6.    2.    4.    4.    2.    2.   29.    2.  ]
Q_values [-18.843014  -9.29239   45.570267 -11.15383 ]
Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 55ms, vehicles TOT 770 ACT 92 BUF 4)                  ?*RT. ?UPS, TraCI: 56ms, vehicles TOT 559 ACT 78 BUF 5)                 
🔄 Ep



state [ 0.    0.   15.71  0.    0.3   0.    0.    0.    3.36  0.    0.94  0.
  0.    0.   17.    0.    1.    0.    0.    0.    2.    0.    4.    0.
  0.    0.   20.    3.    3.    1.    4.    0.    2.    0.    8.    0.  ]
Q_values [-23.625214   37.94309     7.157931   -2.3010583]
state [ 0.    0.    0.    0.    1.87  0.63  0.    0.    0.    0.    7.11  0.
  0.    0.    0.    0.    3.    2.    0.    0.    0.    0.   11.    0.
  3.    2.    4.    2.    6.    3.    5.    3.    1.    3.   16.    1.  ]
Q_values [-3.4782338  -5.40655    16.518286   -0.55571973]
state [ 0.    0.   16.05  0.    0.53  2.43  0.    0.    0.    0.    4.94  0.
  0.    0.   17.    0.    2.    4.    0.    0.    0.    0.   11.    0.
  2.    3.   19.    2.    5.    5.    5.    2.    1.    3.   14.    1.  ]
Q_values [-9.090912   6.9339476 -4.325967  -3.1561723]
state [ 0.    0.    1.62  0.    1.19  3.75  0.    0.    0.    0.    8.53  0.64
  0.    0.   10.    0.    2.    4.    0.    0.    0.    0.   24.    1.
  2.    2. 



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-21.158413   16.433756   -3.3221796  -6.4753923]
state [ 0.    1.04  0.    0.    1.02  0.97  0.    1.03  0.    0.    2.09  0.95
  0.    2.    0.    0.    2.    2.    0.    2.    0.    0.    6.    2.
  0.    2.    3.    2.    4.    4.    4.    2.    0.    0.   10.    2.  ]
Q_values [-3.63238   -2.9575748 -5.0874166 -5.6467824]
state [ 0.    0.    9.38  0.    2.12  0.81  0.    0.    0.81  0.   13.05  0.81
  0.    0.   13.    0.    3.    2.    0.    0.    2.    0.   18.    2.
  0.    1.   16.    2.    5.    4.    4.    0.    2.    1.   21.    2.  ]
Q_values [-21.45218    19.88721    -5.1288857  -6.0419993]
state [ 0.    0.    1.44  0.    0.    3.43  0.    0.    0.    0.    0.    2.41
  0.    0.    8.    0.    0.    4.    0.    0.    0.    0.    0.    2.
  0.    0.   12.    2.    2.    6.    4.    0.    0.    0.   16.    2.  ]
Q_values [-12.619247    8.046132   -4.



state [ 0.    0.    4.03  0.    4.27  0.    0.    0.    0.    0.   14.75  0.
  0.    0.    9.    0.    5.    0.    0.    0.    0.    0.   16.    0.
  0.    1.   12.    0.    6.    1.    4.    0.    0.    1.   19.    0.  ]
Q_values [-9.638732  -1.3234553 22.984426  -8.527429 ]
state [ 0.    0.   49.27  0.    0.37  7.85  0.    0.    3.21  0.    1.67  3.21
  0.    0.   29.    0.    2.    7.    0.    0.    2.    0.   10.    2.
  0.    0.   33.    4.    3.    7.    4.    0.    2.    1.   15.    2.  ]
Q_values [-35.190247    47.440598    -3.3252654    0.28978878]
state [ 0.    0.    0.    0.    2.53 13.99  0.    0.    0.    0.   13.78  4.81
  0.    0.    0.    0.    4.    9.    0.    0.    0.    0.   19.    2.
  2.    3.   16.    2.    7.   10.    5.    2.    1.    2.   23.    3.  ]
Q_values [-28.559576   22.630505    9.675978   -6.0106635]
state [ 0.    0.57  1.44  0.    6.49  0.    0.    0.72  0.39  0.   31.21  0.
  0.    2.    8.    0.    6.    0.    0.    2.    1.    0.   25.    0.
  1. 



state [0.   1.84 0.44 0.   0.   2.15 0.   1.83 0.   0.   0.   1.75 0.   2.
 2.   0.   0.   4.   0.   2.   0.   0.   0.   2.   0.   3.   6.   4.
 1.   4.   4.   2.   0.   1.   3.   2.  ]
Q_values [-1.816128   -7.062224    8.373159    0.58265615]
state [ 0.    3.44  5.23  0.    0.29  0.    0.    3.43  0.    0.    0.24  0.
  0.    2.   10.    0.    1.    0.    0.    2.    0.    0.    2.    0.
  0.    2.   13.    2.    4.    1.    4.    2.    0.    0.    6.    0.  ]
Q_values [-5.339738   2.4499688  7.5709276 -4.0426617]
state [ 0.    0.    3.13  0.    0.    3.43  0.    0.    0.    0.    0.    2.41
  0.    0.    7.    0.    0.    4.    0.    0.    0.    0.    0.    2.
  0.    0.   11.    2.    2.    6.    4.    0.    0.    0.    7.    2.  ]
Q_values [-23.954857   13.4284525   2.7417235  -2.307072 ]
state [ 0.    0.   18.1   0.    0.    0.    0.    0.    1.99  0.    0.    0.
  0.    0.   18.    0.    0.    0.    0.    0.    1.    0.    0.    0.
  0.    0.   21.    2.    2.    2.    4.    0. 



state [ 0.    0.    0.05  0.   15.07  8.87  0.    0.    0.05  0.   41.74  0.05
  0.    0.    1.    0.   10.    7.    0.    0.    1.    0.   27.    1.
  0.    0.    6.    0.   10.    8.    4.    0.    2.    0.   30.    2.  ]
Q_values [  2.0921412 -37.37677    75.051765  -10.1280775]
state [ 0.    0.    1.51  0.    0.   12.04  0.    0.    0.81  0.    0.    0.81
  0.    0.    6.    0.    0.    8.    0.    0.    2.    0.    0.    2.
  0.    1.    9.    2.    2.   10.    4.    0.    2.    1.   20.    3.  ]
Q_values [-23.612509    2.8509216  10.989662    2.0786948]
state [0.00e+00 0.00e+00 6.90e+00 0.00e+00 1.00e-02 0.00e+00 0.00e+00 1.00e-02
 1.00e-02 0.00e+00 1.78e+00 0.00e+00 0.00e+00 1.00e+00 1.40e+01 0.00e+00
 1.00e+00 0.00e+00 0.00e+00 1.00e+00 1.00e+00 0.00e+00 1.00e+01 0.00e+00
 0.00e+00 3.00e+00 1.80e+01 0.00e+00 2.00e+00 1.00e+00 4.00e+00 2.00e+00
 2.00e+00 1.00e+00 1.60e+01 1.00e+00]
Q_values [-24.455877    1.0664897  11.899761    2.2566562]
state [ 0.    0.63 13.4   0.    0.    0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [ -3.417344 -16.5048   -23.560522   4.136096]
state [0.   0.24 0.14 0.   0.23 0.   0.   0.23 0.16 0.   0.28 0.   0.   2.
 2.   0.   2.   0.   0.   2.   2.   0.   3.   0.   1.   2.   7.   2.
 2.   0.   6.   2.   2.   1.   7.   0.  ]
Q_values [ -2.394926   -14.145817    -1.9385996   -0.08565313]
state [ 0.    0.83  0.    0.    2.09 12.04  0.    0.89  0.    0.    5.47  0.81
  0.    2.    0.    0.    3.    8.    0.    2.    0.    0.   10.    2.
  0.    3.    3.    2.    5.   10.    4.    2.    0.    1.   13.    2.  ]
Q_values [-24.539131    -1.4444189   -2.2102349    0.33565527]
state [ 0.    2.44  2.3   0.    5.56  0.    0.    2.49  0.    0.   16.02  0.
  0.    2.    6.    0.    5.    0.    0.    2.    0.    0.   17.    0.
  0.    2.   10.    2.    7.    2.    4.    2.    0.    0.   20.    0.  ]
Q_values [-14.071156   -5.474005   28.64569    -7.4820347]
state [ 0. 



state [ 0.    1.84  5.16  0.    0.4   0.    0.    1.83  1.76  0.    1.81  0.
  0.    2.   10.    0.    2.    0.    0.    2.    2.    0.    6.    0.
  0.    3.   13.    4.    3.    0.    4.    2.    2.    1.    9.    0.  ]
Q_values [-25.140665    4.6770115  -1.2481942  -3.4138854]
state [ 0.    2.64  0.    0.    1.2   0.    0.    2.63  0.    0.    4.76  0.
  0.    2.    0.    0.    2.    0.    0.    2.    0.    0.    9.    0.
  1.    2.    6.    1.    4.    2.    6.    2.    0.    1.   13.    0.  ]
Q_values [ -6.2240086 -10.160498   12.651176   -5.681837 ]
state [0.   3.44 1.22 0.   0.   0.47 0.   3.43 0.   0.   0.   0.   0.   2.
 5.   0.   0.   2.   0.   2.   0.   0.   0.   0.   0.   2.   9.   3.
 2.   3.   4.   2.   0.   0.   4.   0.  ]
Q_values [-3.2127826  -7.721373    0.32491612 -3.6448052 ]
state [ 0.    5.93 13.48  0.    0.    4.47  0.    5.97  0.05  0.    0.    0.05
  0.    3.   14.    0.    0.    5.    0.    3.    1.    0.    0.    1.
  0.    4.   19.    0.    0.    6.    4.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-10.728198    3.871289   -2.6158767  -2.72372  ]
state [ 0.    1.04  1.96  0.    1.02  0.    0.    1.03  0.96  0.    2.1   0.
  0.    2.    6.    0.    2.    0.    0.    2.    2.    0.    6.    0.
  0.    2.   10.    2.    4.    2.    4.    2.    2.    0.   10.    0.  ]
Q_values [-26.89838    16.965706   -6.2228904  -2.5524936]
state [ 0.    4.24  5.53  0.    0.    0.    0.    4.23  0.    0.    0.    0.
  0.    2.   10.    0.    0.    0.    0.    2.    0.    0.    0.    0.
  0.    3.   13.    0.    0.    2.    4.    2.    0.    1.    2.    0.  ]
Q_values [-3.6386976 -4.464432  -4.243964  -3.3522353]
state [ 0.    5.04 10.11  0.    0.    0.    0.    5.03  0.    0.    0.3   0.
  0.    2.   13.    0.    0.    0.    0.    2.    0.    0.    2.    0.
  3.    4.   17.    2.    3.    1.    6.    5.    1.    3.    7.    1.  ]
Q_values [-1.9349039 -9.750098  -6.6349354 -



state [ 0.    0.    1.96  0.    1.02  0.    0.    0.    0.96  0.    2.1   0.
  0.    0.    6.    0.    2.    0.    0.    0.    2.    0.    6.    0.
  0.    0.   10.    2.    4.    2.    4.    0.    2.    0.   10.    0.  ]
Q_values [-14.969654     2.7225165    2.205008    -0.38445157]
state [ 0.    0.    0.    0.    2.28  0.33  0.    0.    0.    0.    5.31  0.
  0.    0.    0.    0.    4.    2.    0.    0.    0.    0.   10.    0.
  0.    1.    3.    4.    5.    2.    4.    0.    0.    1.   13.    0.  ]
Q_values [-12.191446  -7.911464  22.604755  -6.48927 ]
state [0.   0.   0.47 0.   0.   1.13 0.   0.   0.   0.   0.   0.   0.   0.
 3.   0.   0.   2.   0.   0.   0.   0.   0.   0.   1.   0.   7.   1.
 1.   4.   5.   0.   0.   1.   5.   0.  ]
Q_values [-22.33968     8.924822   -4.937724    4.0787945]
state [0.   0.   0.   0.   0.29 2.46 0.   0.   0.   0.   0.94 0.   0.   0.
 0.   0.   1.   4.   0.   0.   0.   0.   4.   0.   0.   0.   3.   2.
 4.   5.   4.   0.   0.   0.   8.   0.  ]
Q_value



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-9.096939  -0.8726931 -1.722662   1.671635 ]
state [0.   0.   0.14 0.   0.22 0.17 0.   0.   0.16 0.   0.26 0.15 0.   0.
 2.   0.   2.   2.   0.   0.   2.   0.   3.   2.   1.   0.   7.   2.
 2.   2.   5.   0.   2.   1.   7.   2.  ]
Q_values [-7.285904  -2.6761856 -2.2031767  2.743012 ]
state [ 0.    0.    1.96  0.    1.02  0.    0.    0.    0.96  0.    2.09  0.
  0.    0.    6.    0.    2.    0.    0.    0.    2.    0.    6.    0.
  0.    0.   10.    2.    4.    2.    4.    0.    2.    0.   10.    0.  ]
Q_values [-25.913479    9.930269   -6.5179014   4.3016667]
state [ 0.    0.    0.47  0.    3.88  0.    0.    0.    0.    0.    9.9   0.
  0.    0.    3.    0.    4.    0.    0.    0.    0.    0.   13.    0.
  1.    0.    7.    1.    6.    2.    5.    0.    0.    1.   17.    0.  ]
Q_values [-15.43432    -8.578279   31.49579    -7.6321006]
state [ 0.    0.    2.3  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-10.480129   -0.6955577  -2.707022    2.4988139]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [ -3.9737551  -4.3965216 -20.071142    1.4606591]
state [0.   0.24 0.14 0.   0.22 0.   0.   0.23 0.16 0.   0.27 0.   0.   2.
 2.   0.   2.   0.   0.   2.   2.   0.   3.   0.   1.   2.   7.   2.
 2.   0.   6.   2.   2.   1.   7.   0.  ]
Q_values [-6.0355935 -3.3485982 -8.035562   1.2147561]
state [ 0.    1.84  5.16  0.    2.28  0.    0.    1.83  1.76  0.    5.31  0.
  0.    2.   10.    0.    4.    0.    0.    2.    2.    0.   10.    0.
  0.    3.   13.    4.    5.    0.    4.    2.    2.    1.   13.    0.  ]
Q_values [-10.787283   -1.9828709   7.649101   -2.3526044]
state [ 0.    2.64  9.76  0.    0.    0.    0.    2.63  2.56  0.    0.    0.
  0.    2.   13.    0.    0.    0.    0.    2.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-9.090546   -0.27921027  5.4593554   2.2067432 ]
state [0.   0.24 0.14 0.   0.   0.17 0.   0.23 0.16 0.   0.   0.15 0.   2.
 2.   0.   0.   2.   0.   2.   2.   0.   0.   2.   1.   2.   7.   2.
 0.   2.   5.   2.   2.   1.   3.   2.  ]
Q_values [-7.564041  -0.7122868 -0.1111933  2.1042373]
state [ 0.    0.    5.16  0.    0.4   0.33  0.    0.    1.76  0.    1.81  0.
  0.    0.   10.    0.    2.    2.    0.    0.    2.    0.    6.    0.
  0.    1.   13.    4.    3.    2.    4.    0.    2.    1.    9.    0.  ]
Q_values [-8.474035   2.1640115  4.941737   1.2953142]
state [ 0.    0.   15.71  0.    0.29  2.46  0.    0.    3.36  0.    0.25  0.
  0.    0.   17.    0.    1.    4.    0.    0.    2.    0.    2.    0.
  0.    0.   20.    2.    4.    5.    4.    0.    2.    0.    6.    0.  ]
Q_values [-28.818577   25.89823    -4.8490286   3.1415737]
state [ 0.    0.79  1.43 



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-7.558574   -0.6356117   0.4920825  -0.82721555]
state [0.   0.24 0.14 0.   0.   0.17 0.   0.23 0.16 0.   0.   0.15 0.   2.
 2.   0.   0.   2.   0.   2.   2.   0.   0.   2.   1.   2.   7.   2.
 0.   2.   5.   2.   2.   1.   3.   2.  ]
Q_values [-7.5915403  -0.6633628   0.517012   -0.83201075]
state [ 0.    1.04  1.96  0.    0.    0.97  0.    1.03  0.96  0.    0.    0.95
  0.    2.    6.    0.    0.    2.    0.    2.    2.    0.    0.    2.
  0.    2.   10.    2.    2.    4.    4.    2.    2.    0.    3.    2.  ]
Q_values [-14.738251   18.29092    -5.482624   -2.6694233]
state [0.   1.84 0.   0.   0.39 2.15 0.   1.83 0.   0.   0.44 1.75 0.   2.
 0.   0.   2.   4.   0.   2.   0.   0.   2.   2.   0.   3.   3.   4.
 3.   4.   4.   2.   0.   1.   6.   2.  ]
Q_values [-7.658091   -0.68037266  0.57379335 -0.8496906 ]
state [0.   2.64 0.47 0.   0.   3.75 0.   2.63 0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-8.303919    0.08967936 -0.9476974  -2.8369637 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-8.254604   -1.387198    0.13569598 -1.4097173 ]
state [0.   0.24 0.14 0.   0.   0.17 0.   0.23 0.16 0.   0.   0.15 0.   2.
 2.   0.   0.   2.   0.   2.   2.   0.   0.   2.   1.   2.   7.   2.
 0.   2.   5.   2.   2.   1.   3.   2.  ]
Q_values [-8.275797   -1.3844503   0.12280797 -1.4085317 ]
state [ 0.    1.04  1.96  0.    0.    0.97  0.    1.03  0.96  0.    0.    0.95
  0.    2.    6.    0.    0.    2.    0.    2.    2.    0.    0.    2.
  0.    2.   10.    2.    2.    4.    4.    2.    2.    0.    3.    2.  ]
Q_values [-8.300935   -1.3827269   0.11373731 -1.4098217 ]
state [ 0.    0.    9.76  0.    0.    3.75  0.    0.    2.56  0.    0.45  2.55
  0.    0.   13.    0.    0.    4.    0. 



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-9.73066    -1.8000203   0.40211725 -2.8132634 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-9.136079   -1.5484407  -0.41343546 -1.4549747 ]
state [ 0.    1.04  1.96  0.    0.    0.97  0.    1.03  0.96  0.    0.    0.95
  0.    2.    6.    0.    0.    2.    0.    2.    2.    0.    0.    2.
  0.    2.   10.    2.    2.    4.    4.    2.    2.    0.    3.    2.  ]
Q_values [-9.118416  -1.5211229 -0.4794327 -1.4571103]
state [ 0.    1.84  5.16  0.    0.    2.15  0.    1.83  1.76  0.    0.    1.75
  0.    2.   10.    0.    0.    4.    0.    2.    2.    0.    0.    2.
  0.    3.   13.    4.    1.    4.    4.    2.    2.    1.    3.    2.  ]
Q_values [-9.114027   -1.5151635  -0.49116927 -1.4509572 ]
state [ 0.    3.44 15.71  0.    0.29  0.    0.    3.43  3.36  0.    0.24  0.
  0.    2



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-13.287699    -0.17735267  -0.91786593  -2.368509  ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-10.337878   -1.2069938  -0.7938676  -1.5369831]
state [0.   0.24 0.14 0.   0.   0.17 0.   0.23 0.16 0.   0.   0.15 0.   2.
 2.   0.   0.   2.   0.   2.   2.   0.   0.   2.   1.   2.   7.   2.
 0.   2.   5.   2.   2.   1.   3.   2.  ]
Q_values [-10.358738   -1.211168   -0.8015498  -1.5525843]
state [ 0.    1.04  1.96  0.    0.    0.97  0.    1.03  0.96  0.    0.    0.95
  0.    2.    6.    0.    0.    2.    0.    2.    2.    0.    0.    2.
  0.    2.   10.    2.    2.    4.    4.    2.    2.    0.    3.    2.  ]
Q_values [-10.383229   -1.2224911  -0.8084696  -1.5608813]
state [ 0.    1.84  5.16  0.    0.    2.15  0.    1.83  1.76  0.    0.    1.75
  0.    2.   10.    0.    0.    4.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-16.577765    4.2416553  -3.00405    -2.9338732]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-10.83073    -1.0658901  -1.2675133  -1.763378 ]
state [0.   0.24 0.   0.   0.22 0.17 0.   0.23 0.   0.   0.26 0.15 0.   2.
 0.   0.   2.   2.   0.   2.   0.   0.   3.   2.   1.   2.   3.   2.
 2.   2.   6.   2.   0.   1.   7.   2.  ]
Q_values [-10.838848   -1.0569036  -1.305321   -1.7692446]
state [ 0.    1.04  0.    0.    1.02  0.97  0.    1.03  0.    0.    2.09  0.95
  0.    2.    0.    0.    2.    2.    0.    2.    0.    0.    6.    2.
  0.    2.    3.    2.    4.    4.    4.    2.    0.    0.   10.    2.  ]
Q_values [-10.846119   -1.0496681  -1.3489481  -1.7745287]
state [ 0.    1.84  0.    0.    2.28  2.15  0.    1.83  0.    0.    5.3   1.75
  0.    2.    0.    0.    4.    4.    0. 

FatalTraCIError: Connection closed by SUMO.

In [None]:
print(NAME_SIMULATION)

simu_simple


In [58]:
scenario(agents,positions_phases)

 Retrying in 1 seconds
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-17.03658     6.665996   -4.0581965  -4.565104 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 2. 3. 3. 2. 2. 2. 4. 2. 2. 3. 3. 2.]
Q_values [-11.199076   -1.2317432  -1.6289366  -1.6423672]
state [0.   0.24 0.   0.   0.22 0.17 0.   0.23 0.   0.   0.26 0.15 0.   2.
 0.   0.   2.   2.   0.   2.   0.   0.   3.   2.   1.   2.   3.   2.
 2.   2.   6.   2.   0.   1.   7.   2.  ]
Q_values [-11.199076   -1.2317432  -1.6289366  -1.6423672]
state [ 0.    1.04  0.    0.    1.02  0.97  0.    1.03  0.    0.    2.09  0.95
  0.    2.    0.    0.    2.    2.    0.    2.    0.    0.    6.    2.
  0.    2.    3.    2.    4.    4.    4.    2.    0.    0.   10.    2.  ]
Q_values [-11.199076   -1.2317432  -1.6289366  -1.6423672]
state [ 0.    1.84  0.    0.    2.28  2.15  0.    1.83  0.    0.    5.3   1.75
  0.    2.    0.  

FatalTraCIError: Connection closed by SUMO.