In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from state import get_state, queue
from basic.reward_basic import calculate_reward
from basic.learner_basic import Learner
from traffic_lights_maps import get_traffic_light_phases, make_map
import os
import sys

import traci
import random
import gym
from gym import spaces

In [3]:
from tensorflow.keras import Sequential, layers
from tensorflow.random import set_seed
import numpy as np
from collections import deque
from tensorflow import keras
from tensorflow import reduce_sum, reduce_mean, one_hot, GradientTape
from tensorflow.keras.losses import MeanSquaredError
import matplotlib.pyplot as plt

In [4]:
set_seed(42)  # extra code – ensures reproducibility on the CPU

input_shape = [36]  # == env.observation_space.shape
n_outputs = 4  # == env.action_space.n
#[((Phase(duration=30.0, state='GGrGrrGGrGrr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grGgrrgrGgrr', minDur=20.0, maxDur=20.0), Phase(duration=30.0, state='GrrGGrGrrGGr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grrgrGgrrgrG', minDur=20.0, maxDur=20.0)), [0, 2, 4, 6])]
model_action = Sequential([
    layers.Dense(32, activation="elu", input_shape=input_shape),
    layers.Dense(32, activation="elu"),
    # layers.Dense(2, activation="elu"),
    layers.Dense(n_outputs)])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
def epsilon_greedy_policy(state, epsilon=0):
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)  # random action
    else:
        Q_values = model_action.predict(state[np.newaxis], verbose=0)[0]
        print(f"state : {state}")
        print(f"Q_values : {Q_values}")
        return Q_values.argmax()  # optimal action according to the DQN

In [6]:
def sample_experiences(batch_size):
    indices = np.random.randint(len(replay_buffer), size=batch_size)
    batch = [replay_buffer[index] for index in indices]
    states, actions, rewards, next_states = [
        np.array([experience[field_index] for experience in batch])
        for field_index in range(4)
    ]
    return states, actions, rewards, next_states

In [7]:
batch_size = 32
discount_factor = 0.5
optimizer = keras.optimizers.Nadam(learning_rate=1e-1)
loss_fn = MeanSquaredError()

def training_step(batch_size):
    experiences = sample_experiences(batch_size)
    states, actions, rewards, next_states = experiences  # a changer
    next_Q_values = model_action.predict(next_states, verbose=0)
    max_next_Q_values = next_Q_values.max(axis=1)
    # runs = 1.0 - (dones | truncateds)  # episode is not done or truncated
    target_Q_values = rewards + discount_factor * max_next_Q_values
    target_Q_values = target_Q_values.reshape(-1, 1)
    mask = one_hot(actions, n_outputs)
    with GradientTape() as tape:
        all_Q_values = model_action(states)
        Q_values = reduce_sum(all_Q_values * mask, axis=1, keepdims=True)
        loss = reduce_mean(loss_fn(target_Q_values, Q_values))

    grads = tape.gradient(loss, model_action.trainable_variables)
    optimizer.apply_gradients(zip(grads, model_action.trainable_variables))

In [13]:
import traci

class SimulationSUMO:
    def __init__(self, sumoCmd,frame):
        self.frame=frame
        if traci.isLoaded():
            traci.close()
        traci.start(sumoCmd)  # Start SUMO once

    def step(self,action):
        ###CODER UN STEP qui prend une action en argument
        #utiliser un modele, renvoyer next state: array, reward:int, done :

        state = np.array(self.queue(self.get_lane()))
        traci.trafficlight.setPhase(self.get_trafficlight_id()[0],2*action)
        for _ in range(self.frame):
            traci.simulationStep()
        next_state = np.array(self.queue(self.get_lane()))
        reward = sum(next_state)-sum(state)
        return next_state,reward


    def get_state(self):
        return traci.vehicle.getIDList()  # Example: Get list of vehicles

    def get_lane(self):
        return traci.lane.getIDList()

    def get_trafficlight_id(self):
        return traci.trafficlight.getIDList()

    def queue(self,lane_ids):
        return [traci.lane.getLastStepHaltingNumber(lane_id) for lane_id in lane_ids]
    def close(self):
        traci.close()  # Properly close SUMO
        os.system("pkill -f sumo")

# Example usage
# sumoCmd = [sumoBinary2, "-c", sumoConfig3, "--start"]
# simulation = SimulationSUMO(sumoCmd)

# for _ in range(10000):  # Run 100 steps
#     simulation.step()
#     print(simulation.get_state())  # Print vehicle IDs

# simulation.close()  # Close SUMO properly

In [15]:
sumoBinary = "/Users/arseneclaustre/sumo/bin/sumo-gui"
sumoBinary2 = "/Users/arseneclaustre/sumo/bin/sumo"
sumoConfig = r"C:/Program Files/rl_project/2025-02-12-16-24-37/osm.sumocfg"
sumoConfig2 = r"C:/Program Files/rl_project/2025-02-25-19-05-44/osm.sumocfg"
# sumoConfig3 = r"/home/psels/code/psels/RL_traffic/RL_traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"


rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
replay_buffer = deque(maxlen=2000)

sumoCmd = [sumoBinary2, "-c", sumoConfig3, "--start"]
for episode in range(10):
    print(f'episode : {episode}')
    epsilon = max(1 - episode / 300, 0.01)
    simulation=SimulationSUMO(sumoCmd,1000)
    lane_ids = simulation.get_lane()
    # print(lane_ids[0])

    #trafic_light_ids = traci.trafficlight.getIDList()
    trafic_light_ids = simulation.get_trafficlight_id()

    state = np.array(simulation.queue(lane_ids))

    action=-1
    wait_times.append(0)
    for _ in range(200): ## TO CHANGED
        action = epsilon_greedy_policy(state, epsilon)
        next_state,reward = simulation.step(action)
        replay_buffer.append((state, action, reward, next_state))
        state = next_state
        if len(replay_buffer) >= batch_size*10:
                training_step(batch_size)

    simulation.close()

episode : 0
Step #5420.26 (0ms ?*RT. ?UPS, TraCI: 6643ms, vehicles TOT 33 ACT 0 BUF 0)                
 Retrying in 1 seconds




Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 33 ACT 0 BUF 0)                   s ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    raCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                   
episode : 1
 Retrying in 1 seconds




Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 1.1042756   1.0991768  -0.44582456  0.07099307]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 34ms, vehicles TOT 33 ACT 0 BUF 0)                  raCI: 37ms, vehicles TOT 33 ACT 0 BUF 0)                  
episode : 2
 Retrying in 1 seconds




Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 32ms, vehicles TOT 33 ACT 0 BUF 0)                  s ?*RT. ?UPS, TraCI: 37ms, vehicles TOT 33 ACT 0 BUF 0)                   raCI: 32ms, vehicles TOT 33 ACT 0 BUF 0)                  
episode : 3
 Retrying in 1 seconds




state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]         
Q_values : [ 0.1872248   0.11655951  0.6456197  -0.00412214]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.19153982 0.3676957  0.10658354 0.13728786]
Step #763.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.52629745 0.6726074  0.611826   1.1136694 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.5003823  1.0434748  0.84982026 0.86869025]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 33ms, vehicles TOT 33 ACT 0 BUF 0)                  raCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                   
episode : 4
 Retrying in 1 seconds




Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 37ms, vehicles TOT 33 ACT 0 BUF 0)                  s ?*RT. ?UPS, TraCI: 33ms, vehicles TOT 33 ACT 0 BUF 0)                   raCI: 33ms, vehicles TOT 33 ACT 0 BUF 0)                  
episode : 5
 Retrying in 1 seconds




Step #761.00 (0ms ?*RT. ?UPS, TraCI: 32ms, vehicles TOT 33 ACT 0 BUF 0)                   state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.25030005 0.28351796 0.05710292 0.34657574]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.23352951 0.5268856  0.36909008 0.03245211]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.6639167  0.05908    0.15134025 0.23743439]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 32ms, vehicles TOT 33 ACT 0 BUF 0)                  raCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                   
episode : 6
 Retrying in 1 seconds




state : [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]         
Q_values : [ 0.03887403  0.03334987 -0.13301826 -0.20952034]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.1668998   0.24679863  0.13525534 -0.14861894]
Step #763.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.23079807 0.33197117 0.18895125 0.18587708]
Step #1483.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                   state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.2968005  0.8046684  0.2980795  0.02068019]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 32ms, vehicles TOT 33 ACT 0 BUF 0)                  
episode : 7
 Retrying in 1 seconds




state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.14089233 0.5064281  0.35587335 0.30247855]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.6805077  0.53517306 0.2839365  0.1791122 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.22709507 0.40667343 0.5438967  0.11301422]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.48165593 1.0415663  0.46678686 0.30060577]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.10141999 0.7311125  0.5914557  0.37568474]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [1.0525842  0.8066931  0.7329807  0.24356127]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 31ms, vehicles TOT 33 ACT 0 BUF 0)    



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]         
Q_values : [ 0.12091428  0.11274755  0.43915772 -0.2220571 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.8984258  0.3626789  0.05042386 0.3025787 ]
Step #763.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.1603896  0.13027453 0.40408874 0.16820478]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.2768628  0.41702127 0.37734985 0.34386015]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-0.05456495 -0.1135025  -0.12543058 -0.11451292]
Step #1483.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                   state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.10276723  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]         
Q_values : [ 0.14610118  0.2032063  -0.02717471  0.20451474]
Step #763.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 33 ACT 0 BUF 0)                    state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.05986649  0.06351018  0.02009845 -0.36610126]
Step #1483.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 33 ACT 0 BUF 0)                   state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.49563113 0.18506193 0.12118244 0.17930079]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.7602359  0.07722652 0.03484321 0.08842301]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [0.22786856 0.11628771 0.08361888 0.34512043]
Step #2000.00 (0ms ?*RT. ?UPS, TraCI: 31ms, vehicles TOT 33 ACT 0 BUF 0)                  


In [16]:
sumoBinary = "/Users/arseneclaustre/sumo/bin/sumo-gui"
sumoBinary2 = "/Users/arseneclaustre/sumo/bin/sumo"
sumoConfig = r"C:/Program Files/rl_project/2025-02-12-16-24-37/osm.sumocfg"
sumoConfig2 = r"C:/Program Files/rl_project/2025-02-25-19-05-44/osm.sumocfg"
# sumoConfig3 = r"/home/psels/code/psels/RL_traffic/RL_traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"


rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
replay_buffer = deque(maxlen=2000)


sumoCmd = [sumoBinary, "-c", sumoConfig3, "--start"]

if traci.isLoaded():
    traci.close()
traci.start(sumoCmd)
lane_ids =  traci.lane.getIDList()
# print(lane_ids[0])

trafic_light_ids = traci.trafficlight.getIDList()

state = np.array(queue(lane_ids))
action=-1
# print(state)
wait_times.append(0)
for step in range(10000): ## TO CHANGED
    #epsilon = max(1 - episode / 10, 0.01)
    state=np.array(queue(lane_ids))
    if step%1000 == 0:
        state=np.array(queue(lane_ids))
        action = epsilon_greedy_policy(state,0)
        print("action", action)
        traci.trafficlight.setPhase(trafic_light_ids[0],action)
    traci.simulationStep()

traci.close()

 Retrying in 1 seconds
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04640955  0.12251711  0.06835556 -0.85784125]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04640955  0.12251711  0.06835556 -0.85784125]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04640955  0.12251711  0.06835556 -0.85784125]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04640931  0.12251568  0.06835747 -0.8578422 ]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04639333  0.12236452  0.06846619 -0.8579109 ]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 6 6 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ 0.04641241  0.12236285  0.06840611 -0.85788894]
action 1
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 6 6 0 0 0 0 4 4 0 0 0 0 