In [1]:
import numpy as np
import random

In [3]:
# Define parameters for Q-Learning
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor for future rewards
epsilon = 0.1  # Exploration rate

In [5]:
# Define the state and action space
states = ['low_traffic', 'medium_traffic', 'high_traffic']  # Example traffic conditions
actions = ['extend_green', 'reduce_green', 'switch_light']  # Actions that adjust signal timings

In [7]:
# Initialize Q-Table (3 states x 3 actions)
Q_table = np.zeros((len(states), len(actions)))

In [9]:
# Reward function (Example: Negative reward for higher waiting time)
def get_reward(state):
    if state == 'low_traffic':
        return -1  # Minor waiting time
    elif state == 'medium_traffic':
        return -5  # Moderate waiting time
    elif state == 'high_traffic':
        return -10  # Severe waiting time

In [11]:
# Choose action based on epsilon-greedy strategy
def choose_action(state_index):
    if random.uniform(0, 1) < epsilon:  # Exploration
        return random.randint(0, len(actions) - 1)
    else:  # Exploitation
        return np.argmax(Q_table[state_index])


In [13]:
# Simulate environment dynamics (Example: how state changes after action)
def get_next_state(state_index, action):
    # This part simulates how the environment changes based on the action taken
    if actions[action] == 'extend_green':
        return max(0, state_index - 1)  # Traffic might decrease
    elif actions[action] == 'reduce_green':
        return min(len(states) - 1, state_index + 1)  # Traffic might increase
    else:
        return random.randint(0, len(states) - 1)  # Switch light may change traffic randomly

In [19]:
# Q-Learning Algorithm
for episode in range(1000):  # Run for 1000 episodes
    # Initialize state (random traffic condition)
    current_state_index = random.randint(0, len(states) - 1)
    
    for step in range(100):  # Limit each episode to 100 steps
        # Choose action
        action_index = choose_action(current_state_index)
        
        # Execute action and observe next state
        next_state_index = get_next_state(current_state_index, action_index)
        
        # Get reward for the current state
        reward = get_reward(states[next_state_index])
        
        # Update Q-Table using Q-Learning formula
        Q_table[current_state_index, action_index] = Q_table[current_state_index, action_index] + alpha * (
            reward + gamma * np.max(Q_table[next_state_index]) - Q_table[current_state_index, action_index]
        )
        
        # Move to next state
        current_state_index = next_state_index

In [21]:
# Display learned Q-Table
print("Learned Q-Table:")
print(Q_table)

Learned Q-Table:
[[-10.         -14.         -14.57641339]
 [-10.         -22.59995603 -13.38929113]
 [-14.         -22.57607496 -15.66499054]]
