# <div style="text-align:center;">LAB EXERCISE1: </div>




In [None]:
import numpy as np
import random

# Define the states
states = ['Far-Red', 'Near-Red', 'At-Red', 'Far-Green', 'Near-Green', 'At-Green', 'Pedestrian', 'Stopped', 'Accident']

# Define the actions
actions = ['Stop', 'Drive']

# Transition matrix - probabilities of moving from one state to another based on action
transitions = {
    'Far-Red': {'Stop': {'Far-Red': 0.8, 'Near-Red': 0.2}, 'Drive': {'Near-Red': 0.9, 'Pedestrian': 0.1}},
    'Near-Red': {'Stop': {'Near-Red': 0.7, 'At-Red': 0.3}, 'Drive': {'At-Red': 0.8, 'Pedestrian': 0.2}},
    'At-Red': {'Stop': {'At-Red': 0.9, 'Far-Green': 0.1}, 'Drive': {'Accident': 1.0}},
    'Far-Green': {'Drive': {'Near-Green': 1.0}, 'Stop': {'Far-Green': 1.0}},
    'Near-Green': {'Drive': {'At-Green': 1.0}, 'Stop': {'Near-Green': 1.0}},
    'At-Green': {'Drive': {'Stopped': 1.0}, 'Stop': {'Stopped': 1.0}},
    'Pedestrian': {'Stop': {'Pedestrian': 0.8, 'Near-Red': 0.2}, 'Drive': {'Accident': 1.0}},
    'Stopped': {'Stop': {'Stopped': 1.0}, 'Drive': {'Accident': 1.0}},
    'Accident': {'Stop': {'Accident': 1.0}, 'Drive': {'Accident': 1.0}},
}

# Rewards for state-action pairs
rewards = {
    'Far-Red': {'Stop': +1, 'Drive': -5},
    'Near-Red': {'Stop': +1, 'Drive': -5},
    'At-Red': {'Stop': +1, 'Drive': -20},
    'Far-Green': {'Drive': +10, 'Stop': 0},
    'Near-Green': {'Drive': +10, 'Stop': 0},
    'At-Green': {'Drive': +10, 'Stop': 0},
    'Pedestrian': {'Stop': 0, 'Drive': -20},
    'Stopped': {'Stop': 0, 'Drive': -20},
    'Accident': {'Stop': -20, 'Drive': -20},
}

# Discount factor (gamma)
gamma = 0.9

# Initialize Q-table
Q = np.zeros((len(states), len(actions)))

# Function to get the next state
def get_next_state(current_state, action):
    possible_transitions = transitions[current_state][action]
    next_state = random.choices(list(possible_transitions.keys()), list(possible_transitions.values()))[0]
    return next_state

# Function to update Q-values
def q_learning(current_state, action, reward, next_state):
    state_index = states.index(current_state)
    action_index = actions.index(action)
    next_state_index = states.index(next_state)
    
    # Q-Learning update rule
    Q[state_index, action_index] = reward + gamma * np.max(Q[next_state_index])

# Simulation loop
for episode in range(100):
    current_state = 'Far-Red'  # Reset to starting state
    for step in range(10):
        # Choose action (for simplicity, random choice)
        action = random.choice(actions)
        
        # Get the next state and reward
        next_state = get_next_state(current_state, action)
        reward = rewards[current_state][action]
        
        # Update Q-values
        q_learning(current_state, action, reward, next_state)
        
        # Print the state and action
        print(f"Step {step + 1}: State: {current_state}, Action: {action}, Reward: {reward}, Next State: {next_state}")
        
        # Update the current state
        current_state = next_state
        
        # End the episode if accident occurs
        if current_state == 'Accident':
            print("--- Accident occurred! ---")
            break

print("\nFinal Q-table:")
print(Q)


In [1]:
import numpy as np
import gym
from gym import spaces

# Create the custom environment
class SelfDrivingCarEnv(gym.Env):
    def __init__(self):
        super(SelfDrivingCarEnv, self).__init__()
        
        # Define action and state space
        # Actions: 0 = Stop, 1 = Drive
        self.action_space = spaces.Discrete(2)
        
        # States: S1-S5 (Car far/near, red/green light, at intersection)
        self.observation_space = spaces.Discrete(5)
        
        # Rewards
        self.reward = 0
        
        # Initial state
        self.state = 0  # Start at far, red light (S1)
        
        # State descriptions
        self.state_desc = {
            0: "Far from intersection, Red Light",
            1: "Far from intersection, Green Light",
            2: "Near intersection, Red Light",
            3: "Near intersection, Green Light",
            4: "At the intersection"
        }
        
    def step(self, action):
        # Action descriptions
        action_desc = {0: "Stop", 1: "Drive"}

        print(f"\nAction Taken: {action_desc[action]}")

        # Define rewards and transitions
        if self.state == 0:  # S1: far from red light
            if action == 0:  # Stop
                self.reward = 1  # Positive reward for stopping at red light
                print("Reward: +1 for stopping at Red Light.")
                self.state = 2  # Transition to near red light
            else:  # Drive
                self.reward = -10  # Negative reward for running red light
                print("Penalty: -10 for running Red Light!")
                self.state = 0  # Remain in same state
        
        elif self.state == 1:  # S2: far from green light
            if action == 1:  # Drive
                self.reward = 1  # Positive reward for driving at green light
                print("Reward: +1 for driving through Green Light.")
                self.state = 3  # Move near green light
            else:  # Stop
                self.reward = -1  # Small penalty for stopping at green light
                print("Penalty: -1 for stopping unnecessarily at Green Light.")
        
        elif self.state == 2:  # S3: near red light
            if action == 0:  # Stop
                self.reward = 1  # Positive reward for stopping at red light
                print("Reward: +1 for stopping at Red Light.")
                self.state = 4  # Move to the intersection (S5)
            else:  # Drive
                self.reward = -10  # Negative reward for running red light
                print("Penalty: -10 for running Red Light!")
                self.state = 0  # Back to far from intersection, red light
        
        elif self.state == 3:  # S4: near green light
            if action == 1:  # Drive
                self.reward = 1  # Positive reward for driving through green light
                print("Reward: +1 for driving through Green Light.")
                self.state = 4  # Move to the intersection (S5)
            else:  # Stop
                self.reward = -1  # Small penalty for stopping at green light
                print("Penalty: -1 for stopping unnecessarily at Green Light.")
        
        # Terminal state if the car reaches the intersection
        done = bool(self.state == 4)
        
        # Output the current state in a human-readable format
        print(f"Current State: {self.state_desc[self.state]}")

        # Return observation (state), reward, done, info
        return self.state, self.reward, done, {}
    
    def reset(self):
        # Reset to initial state (far, red light)
        self.state = 0
        print("\n--- Simulation Reset ---")
        print("Starting State: Far from intersection, Red Light")
        return self.state
    
    def render(self, mode='human'):
        # You can add rendering logic here if you want a visual environment
        pass

# Test the environment
env = SelfDrivingCarEnv()
state = env.reset()

print("\n--- Simulation Start ---")

# Simulate 10 steps in the environment
for step in range(10):
    print(f"\n--- Step {step + 1} ---")
    action = env.action_space.sample()  # Randomly choose an action
    state, reward, done, _ = env.step(action)
    
    if done:
        print("\n--- Car has reached the intersection! Simulation End ---")
        break



--- Simulation Reset ---
Starting State: Far from intersection, Red Light

--- Simulation Start ---

--- Step 1 ---

Action Taken: Stop
Reward: +1 for stopping at Red Light.
Current State: Near intersection, Red Light

--- Step 2 ---

Action Taken: Drive
Penalty: -10 for running Red Light!
Current State: Far from intersection, Red Light

--- Step 3 ---

Action Taken: Drive
Penalty: -10 for running Red Light!
Current State: Far from intersection, Red Light

--- Step 4 ---

Action Taken: Drive
Penalty: -10 for running Red Light!
Current State: Far from intersection, Red Light

--- Step 5 ---

Action Taken: Stop
Reward: +1 for stopping at Red Light.
Current State: Near intersection, Red Light

--- Step 6 ---

Action Taken: Drive
Penalty: -10 for running Red Light!
Current State: Far from intersection, Red Light

--- Step 7 ---

Action Taken: Stop
Reward: +1 for stopping at Red Light.
Current State: Near intersection, Red Light

--- Step 8 ---

Action Taken: Drive
Penalty: -10 for running