In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from state import get_state, queue
from basic.reward_basic import calculate_reward
from basic.learner_basic import Learner
from traffic_lights_maps import get_traffic_light_phases, make_map
import os
import sys

import traci
import random
import gym
from gym import spaces

In [3]:
from tensorflow.keras import Sequential, layers
from tensorflow.random import set_seed
import numpy as np
from collections import deque
from tensorflow import keras
from tensorflow import reduce_sum, reduce_mean, one_hot, GradientTape
from tensorflow.keras.losses import MeanSquaredError
import matplotlib.pyplot as plt

In [4]:
set_seed(42)  # extra code – ensures reproducibility on the CPU

input_shape = [36]  # == env.observation_space.shape
n_outputs = 4  # == env.action_space.n
#[((Phase(duration=30.0, state='GGrGrrGGrGrr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grGgrrgrGgrr', minDur=20.0, maxDur=20.0), Phase(duration=30.0, state='GrrGGrGrrGGr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grrgrGgrrgrG', minDur=20.0, maxDur=20.0)), [0, 2, 4, 6])]
model_action = Sequential([
    layers.Dense(32, activation="elu", input_shape=input_shape),
    layers.Dense(32, activation="elu"),
    # layers.Dense(2, activation="elu"),
    layers.Dense(n_outputs)])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
def epsilon_greedy_policy(state, epsilon=0):
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)  # random action
    else:
        Q_values = model_action.predict(state[np.newaxis], verbose=0)[0]
        return Q_values.argmax()  # optimal action according to the DQN

In [6]:
def sample_experiences(batch_size):
    indices = np.random.randint(len(replay_buffer), size=batch_size)
    batch = [replay_buffer[index] for index in indices]
    states, actions, rewards, next_states = [
        np.array([experience[field_index] for experience in batch])
        for field_index in range(4)
    ]
    return states, actions, rewards, next_states

In [7]:
batch_size = 32
discount_factor = 0.5
optimizer = keras.optimizers.Nadam(learning_rate=1e-1)
loss_fn = MeanSquaredError()

def training_step(batch_size):
    experiences = sample_experiences(batch_size)
    states, actions, rewards, next_states = experiences  # a changer
    next_Q_values = model_action.predict(next_states, verbose=0)
    max_next_Q_values = next_Q_values.max(axis=1)
    # runs = 1.0 - (dones | truncateds)  # episode is not done or truncated
    target_Q_values = rewards + discount_factor * max_next_Q_values
    target_Q_values = target_Q_values.reshape(-1, 1)
    mask = one_hot(actions, n_outputs)
    with GradientTape() as tape:
        all_Q_values = model_action(states)
        Q_values = reduce_sum(all_Q_values * mask, axis=1, keepdims=True)
        loss = reduce_mean(loss_fn(target_Q_values, Q_values))

    grads = tape.gradient(loss, model_action.trainable_variables)
    optimizer.apply_gradients(zip(grads, model_action.trainable_variables))

In [None]:
import traci

class SimulationSUMO:
    def __init__(self, sumoCmd,frame):
        self.frame=frame
        if traci.isLoaded():
            traci.close()
        traci.start(sumoCmd)  # Start SUMO once
        self.lanes_ids = traci.lane.getIDList()
        self.trafficlights_ids = traci.trafficlight.getIDList()


    def queue(self,lane_ids):
        return [traci.lane.getLastStepHaltingNumber(lane_id) for lane_id in lane_ids]

    def get_lane_no_intersection(self,lane_ids=None):
        if not lane_ids:
            lane_ids=self.lanes_ids
        return [lane_id for lane_id in lane_ids if lane_id.startswith(':')]


    def get_state(self,lane_ids):
        return [traci.lane.getLastStepHaltingNumber(lane_id) for i,lane_id in enumerate(lane_ids) ]+\
        [traci.lane.getLastStepVehicleNumber(lane_id) for i,lane_id in enumerate(lane_ids)]

    def step(self,action):
        ###CODER UN STEP qui prend une action en argument
        #utiliser un modele, renvoyer next state: array, reward:int, done :

        state = np.array(self.queue(self.get_lane()))
        traci.trafficlight.setPhase(self.get_trafficlight_id()[0],2*action)
        for _ in range(self.frame):
            traci.simulationStep()
        next_state = np.array(self.queue(self.get_lane()))
        reward = sum(next_state)-sum(state)
        return next_state,reward



    def close(self):
        traci.close()  # Properly close SUMO
        os.system("pkill -f sumo")

# Example usage
# sumoCmd = [sumoBinary2, "-c", sumoConfig3, "--start"]
# simulation = SimulationSUMO(sumoCmd)

# for _ in range(10000):  # Run 100 steps
#     simulation.step()
#     print(simulation.get_state())  # Print vehicle IDs

# simulation.close()  # Close SUMO properly

In [10]:
sumoBinary = "/Users/arseneclaustre/sumo/bin/sumo-gui"
sumoBinary3 = "/Users/arseneclaustre/sumo/bin/sumo"
sumoBinary2 = "/Library/Frameworks/EclipseSUMO.framework/Versions/1.22.0/EclipseSUMO/bin/sumo-gui"
sumoConfig = r"C:/Program Files/rl_project/2025-02-12-16-24-37/osm.sumocfg"
sumoConfig2 = r"C:/Program Files/rl_project/2025-02-25-19-05-44/osm.sumocfg"
# sumoConfig3 = r"/home/psels/code/psels/RL_traffic/RL_traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"

rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
replay_buffer = deque(maxlen=2000)

sumoCmd = [sumoBinary2, "-c", sumoConfig3, "--start"]
for episode in range(300):
    print(f'episode : {episode}')
    epsilon = max(1 - episode / 150, 0.01)
    simulation=SimulationSUMO(sumoCmd,500)
    lane_ids = simulation.get_lane()
    # print(lane_ids[0])

    #trafic_light_ids = traci.trafficlight.getIDList()
    trafic_light_ids = simulation.get_trafficlight_id()

    state = np.array(simulation.queue(lane_ids))

    action=-1
    wait_times.append(0)
    for _ in range(20): ## TO CHANGED
        action = epsilon_greedy_policy(state, epsilon)
        next_state,reward = simulation.step(action)
        replay_buffer.append((state, action, reward, next_state))
        state = next_state
        if len(replay_buffer) >= batch_size*10:
                training_step(batch_size)

    simulation.close()

episode : 0
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:50854 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:50854 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:50854 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:50854 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:50854 [Errno 61] Connection refused
 Retrying in 1 seconds
episode : 1
 Retrying in 1 seconds


FatalTraCIError: Connection closed by SUMO.

In [3]:
sumoBinary = "/Users/arseneclaustre/sumo/bin/sumo-gui"
sumoBinary2 = "/Users/arseneclaustre/sumo/bin/sumo"
sumoBinary3 = "/Library/Frameworks/EclipseSUMO.framework/Versions/1.22.0/EclipseSUMO/bin/sumo-gui"
sumoConfig = r"C:/Program Files/rl_project/2025-02-12-16-24-37/osm.sumocfg"
sumoConfig2 = r"C:/Program Files/rl_project/2025-02-25-19-05-44/osm.sumocfg"
# sumoConfig3 = r"/home/psels/code/psels/RL_traffic/RL_traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"


rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
replay_buffer = deque(maxlen=2000)


sumoCmd = [sumoBinary, "-c", sumoConfig3, "--start"]

if traci.isLoaded():
    traci.close()
traci.start(sumoCmd)
lane_ids =  traci.lane.getIDList()
# print(lane_ids[0])

trafic_light_ids = traci.trafficlight.getIDList()

state = np.array(queue(lane_ids))
action=-1
# print(state)
wait_times.append(0)
for step in range(10000): ## TO CHANGED
    #epsilon = max(1 - episode / 10, 0.01)
    state=np.array(queue(lane_ids))
    if step%1000 == 0:
        state=np.array(queue(lane_ids))
        action = epsilon_greedy_policy(state,0)
        print("action", action*2)
        traci.trafficlight.setPhase(trafic_light_ids[0],action*2)
    traci.simulationStep()

traci.close()

NameError: name 'deque' is not defined