In [2]:
import cityflow

In [26]:
import csv

env = cityflow.Engine("config/config_fixed.json", thread_num=1)
travel_times=[] 
vehicle_counts=[]

with open("metrics.csv", "w", newline="") as f: 
    writer = csv.writer(f) 
    writer.writerow(["step", "avg_travel_time", "vehicle_count"]) 
    
    for step in range(1000): 
        env.next_step() 
        avg_time = env.get_average_travel_time() 
        count = len(env.get_vehicles()) 
        # Save metrics 
        writer.writerow([step, avg_time, count]) 
        print(f"Step {step}: avg_time={avg_time:.2f}, vehicles={count}")

Step 0: avg_time=1.00, vehicles=12
Step 1: avg_time=2.00, vehicles=12
Step 2: avg_time=3.00, vehicles=12
Step 3: avg_time=4.00, vehicles=12
Step 4: avg_time=5.00, vehicles=12
Step 5: avg_time=3.50, vehicles=24
Step 6: avg_time=4.50, vehicles=24
Step 7: avg_time=5.50, vehicles=24
Step 8: avg_time=6.50, vehicles=24
Step 9: avg_time=7.50, vehicles=24
Step 10: avg_time=6.00, vehicles=36
Step 11: avg_time=7.00, vehicles=36
Step 12: avg_time=8.00, vehicles=36
Step 13: avg_time=9.00, vehicles=36
Step 14: avg_time=10.00, vehicles=36
Step 15: avg_time=8.50, vehicles=48
Step 16: avg_time=9.50, vehicles=48
Step 17: avg_time=10.50, vehicles=48
Step 18: avg_time=11.50, vehicles=48
Step 19: avg_time=12.50, vehicles=48
Step 20: avg_time=11.00, vehicles=60
Step 21: avg_time=12.00, vehicles=60
Step 22: avg_time=13.00, vehicles=60
Step 23: avg_time=14.00, vehicles=60
Step 24: avg_time=15.00, vehicles=60
Step 25: avg_time=13.50, vehicles=72
Step 26: avg_time=14.50, vehicles=72
Step 27: avg_time=15.50, ve

In [17]:
import cityflow

def evaluate_baseline(config_path="config/config_fixed.json", episodes=5, max_steps=1000):
    results = []
    for ep in range(episodes):
        env = cityflow.Engine(config_path, thread_num=1)
        total_reward, total_queue, steps = 0, 0, 0

        for step in range(max_steps):
            env.next_step()

            # reward = negative of total waiting
            waiting_counts = env.get_lane_waiting_vehicle_count()
            total_waiting = sum(waiting_counts.values())
            reward = -float(total_waiting)

            total_reward += reward
            total_queue += total_waiting
            steps += 1

        avg_time = env.get_average_travel_time()
        avg_queue = total_queue / steps

        ep_result = {
            "episode": ep,
            "reward": total_reward,
            "avg_time": avg_time,
            "avg_queue": avg_queue
        }
        results.append(ep_result)

        # ✅ Print result same style as DQN
        print(f"Baseline Episode {ep}: reward={total_reward:.2f}, avg_time={avg_time:.2f}, avg_queue={avg_queue:.2f}")

    return results

In [27]:
baseline_results = evaluate_baseline("config/config_fixed.json", episodes=10, max_steps=1000)

Baseline Episode 0: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 1: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 2: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 3: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 4: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 5: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 6: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 7: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 8: reward=-98542.00, avg_time=83.46, avg_queue=98.54
Baseline Episode 9: reward=-98542.00, avg_time=83.46, avg_queue=98.54


# DQN on the Example Env

In [8]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import cityflow
import matplotlib.pyplot as plt
import matplotlib.patches as patches


class CityFlowEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 15}

    def __init__(self, config_path="config/config.json", intersection_id="intersection_1_1"):
        super(CityFlowEnv, self).__init__()

        # CityFlow engine
        self.eng = cityflow.Engine(config_path, thread_num=1)
        self.intersection_id = intersection_id

        # Action space = 8 phases (from your roadnet.json)
        self.action_space = spaces.Discrete(8)

        # Observation space = vehicle count per lane
        self.lanes = list(self.eng.get_lane_vehicle_count().keys())
        self.observation_space = spaces.Box(
            low=0, high=100, shape=(len(self.lanes),), dtype=np.float32
        )

        # Rendering setup
        self.fig, self.ax = None, None

        self.step_count = 0
        self.max_steps = 1000

    def reset(self, seed=None, options=None):
        self.eng.reset()
        self.step_count = 0
        return self._get_state(), {}

    def step(self, action):
        # Set traffic light phase
        self.eng.set_tl_phase(self.intersection_id, int(action))

        # Advance the simulation for a few steps
        for _ in range(5):
            self.eng.next_step()
            self.step_count += 1

        obs = self._get_state()

        # Reward = negative of waiting vehicles
        waiting_counts = self.eng.get_lane_waiting_vehicle_count()  # returns dict
        total_waiting = sum(waiting_counts.values())
        reward = -float(total_waiting)

        done = self.step_count >= self.max_steps
        return obs, reward, done, False, {}

    def _get_state(self):
        lane_counts = self.eng.get_lane_vehicle_count()
        state = [lane_counts[lane] for lane in self.lanes]
        return np.array(state, dtype=np.float32)

In [14]:
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

# Create environment
env = DummyVecEnv([lambda: CityFlowEnv("config/config.json", intersection_id="intersection_1_1")])

model = DQN(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=1e-3,
    buffer_size=50000,
    batch_size=64
)

episodes = 100

for ep in range(episodes):
    model.learn(total_timesteps=1000, reset_num_timesteps=False)
    print(f"Finished training episode {ep+1}")

model.save("cityflow_dqn2")

In [5]:
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

In [6]:
def evaluate_dqn(env, model, episodes=5):
    results = []
    for ep in range(episodes):
        obs = env.reset()   # only obs returned in DummyVecEnv
        total_reward, steps, total_queue = 0, 0, 0
        done = False
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)

            # SB3 VecEnv returns arrays → take first element
            reward = reward[0]
            done = done[0]

            # measure queue
            waiting_counts = env.envs[0].eng.get_lane_waiting_vehicle_count()
            total_waiting = sum(waiting_counts.values())

            total_reward += reward
            total_queue += total_waiting
            steps += 1

        avg_time = env.envs[0].eng.get_average_travel_time()
        avg_queue = total_queue / steps
        ep_result = {
            "episode": ep,
            "reward": total_reward,
            "avg_time": avg_time,
            "avg_queue": avg_queue
        }
        results.append(ep_result)

        # ✅ Print each episode result as it finishes
        print(f"Episode {ep}: reward={total_reward:.2f}, avg_time={avg_time:.2f}, avg_queue={avg_queue:.2f}")

    return results

In [9]:
env = DummyVecEnv([lambda: CityFlowEnv("config/config.json", intersection_id="intersection_1_1")])

In [10]:
model = DQN.load("cityflow_dqn2", env)
dqn_result = evaluate_dqn(env, model, episodes=10)

Episode 0: reward=-15410.00, avg_time=0.00, avg_queue=76.36
Episode 1: reward=-15378.00, avg_time=0.00, avg_queue=76.19
Episode 2: reward=-14729.00, avg_time=0.00, avg_queue=72.89
Episode 3: reward=-15930.00, avg_time=0.00, avg_queue=78.88
Episode 4: reward=-14444.00, avg_time=0.00, avg_queue=71.52
Episode 5: reward=-14101.00, avg_time=0.00, avg_queue=69.80
Episode 6: reward=-14514.00, avg_time=0.00, avg_queue=71.84
Episode 7: reward=-15558.00, avg_time=0.00, avg_queue=77.01
Episode 8: reward=-17616.00, avg_time=0.00, avg_queue=87.18
Episode 9: reward=-13918.00, avg_time=0.00, avg_queue=68.89


In [19]:
model = DQN.load("cityflow_dqn3", env)
dqn_result = evaluate_dqn(env,model,episodes=5)