<a href="https://colab.research.google.com/github/NINJAHATTORI004/5TH-SEM-TRAINING/blob/main/reinforcement_learning__to_optimize_traffic_signal_timings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import gym
import numpy as np
from gym import spaces

class TrafficSignalEnv(gym.Env):
    def __init__(self):
        super(TrafficSignalEnv, self).__init__()

        # State space: traffic flow from 4 directions
        self.observation_space = spaces.Box(low=0, high=100, shape=(4,), dtype=np.int32)

        # Action space: green light duration for each direction (in seconds)
        self.action_space = spaces.Box(low=10, high=60, shape=(4,), dtype=np.int32)

        # Traffic flow data (simulated)
        self.traffic_flow = np.random.randint(0, 20, size=(4, 60))

        self.current_time = 0
        self.waiting_times = np.zeros(4)

    def reset(self):
        self.current_time = 0
        self.waiting_times = np.zeros(4)
        return self._get_state()

    def _get_state(self):
        # Return current traffic flow from all directions
        return self.traffic_flow[:, self.current_time]

    def step(self, action):
        # action: array of 4 values representing green light duration for each direction
        total_waiting_time = 0

        # Calculate waiting times based on signal timings
        for direction in range(4):
            green_time = action[direction]
            vehicles = self.traffic_flow[direction, self.current_time]

            # Simple waiting time calculation:
            # If green time is less than needed for vehicles to pass,
            # remaining vehicles need to wait
            vehicles_per_second = 2  # assumption: 2 vehicles can pass per second
            vehicles_passed = green_time * vehicles_per_second
            waiting_vehicles = max(0, vehicles - vehicles_passed)

            self.waiting_times[direction] += waiting_vehicles
            total_waiting_time += waiting_vehicles

        # Move to next time step
        self.current_time += 1
        done = self.current_time >= 60

        # Reward is negative of total waiting time
        reward = -total_waiting_time

        return self._get_state(), reward, done, {"waiting_times": self.waiting_times}

    def render(self, mode='human'):
        print(f"Current Time: {self.current_time}")
        print(f"Current State (Traffic Flow): {self._get_state()}")
        print(f"Cumulative Waiting Times: {self.waiting_times}")

# Example usage
if __name__ == "__main__":
    env = TrafficSignalEnv()

    # Reset the environment
    initial_state = env.reset()
    print("Initial State:", initial_state)

    # Simulate one step with random action (signal timings)
    action = env.action_space.sample()
    print("Action (signal timings):", action)

    next_state, reward, done, info = env.step(action)
    print("Next State:", next_state)
    print("Reward:", reward)
    print("Done:", done)
    print("Info:", info)

    env.render()





Initial State: [ 8  9 12  8]
Action (signal timings): [24 30 19 32]
Next State: [16 13  6  6]
Reward: 0
Done: False
Info: {'waiting_times': array([0., 0., 0., 0.])}
Current Time: 1
Current State (Traffic Flow): [16 13  6  6]
Cumulative Waiting Times: [0. 0. 0. 0.]
