#Program 1: Self-Driving Car at an Intersection

##Importing Required Libraries

In [2]:
import numpy as np
import random

##Environment Class for Self-Driving Car

In [3]:
class SelfDrivingCarEnv:
    def __init__(self):
        """
        Initialize the environment for the self-driving car.
        Define states, actions, initial state, reward structure, and discount factor.
        """
        # Define the possible states
        self.states = ["Green_Moving", "Green_Stopped", "Red_Moving", "Red_Stopped"]
        # Define the possible actions
        self.actions = ["Stop", "Drive"]
        # Initialize the current state
        self.current_state = "Red_Stopped"
        # Initialize done flag for the episode
        self.done = False
        # Set the discount factor
        self.gamma = 0.9  # Discount factor
        # Define the reward structure based on state-action pairs
        self.rewards = {
            ("Green_Moving", "Drive"): 1,         # Reward for driving in a green light
            ("Green_Stopped", "Stop"): 1,         # Reward for stopping at a green light
            ("Green_Stopped", "Drive"): -1,       # Penalty for starting to drive from stopped state (inappropriate action)
            ("Red_Moving", "Drive"): -10,         # Penalty for driving in a red light
            ("Red_Stopped", "Stop"): 0,           # Neutral reward for stopping at a red light
            ("Red_Stopped", "Drive"): -10,        # Penalty for driving while stopped at a red light
            ("Red_Moving", "Stop"): -5,           # Penalty for stopping while in red light
        }

    # ==========================
    # Reset Function
    # ==========================
    def reset(self):
        """
        Reset the environment to a random initial state.
        Returns the initial state after resetting.
        """
        self.current_state = random.choice(self.states)  # Randomize starting state
        self.done = False  # Reset done flag
        return self.current_state  # Return the initial state

    # ==========================
    # Step Function
    # ==========================
    def step(self, action):
        """
        Execute the given action in the current state.
        Update the state based on the action taken, return the next state,
        reward received, and whether the episode is done.
        """
        # Determine the next state and reward based on the current state and action
        if self.current_state == "Green_Moving":
            if action == "Drive":
                next_state = "Green_Moving"
                reward = self.rewards[("Green_Moving", "Drive")]
            else:  # Stop
                next_state = "Green_Stopped"
                reward = self.rewards[("Green_Moving", "Stop")]

        elif self.current_state == "Green_Stopped":
            if action == "Drive":
                next_state = "Green_Moving"
                reward = self.rewards[("Green_Stopped", "Drive")]
            else:  # Stop
                next_state = "Green_Stopped"
                reward = self.rewards[("Green_Stopped", "Stop")]

        elif self.current_state == "Red_Moving":
            if action == "Drive":
                next_state = "Red_Moving"
                reward = self.rewards[("Red_Moving", "Drive")]
            else:  # Stop
                next_state = "Red_Stopped"
                reward = self.rewards[("Red_Moving", "Stop")]

        else:  # Red_Stopped
            if action == "Drive":
                next_state = "Red_Moving"
                reward = self.rewards[("Red_Stopped", "Drive")]
            else:  # Stop
                next_state = "Red_Stopped"
                reward = self.rewards[("Red_Stopped", "Stop")]

        # Update the current state
        self.current_state = next_state  # Transition to the next state

        # Set done condition: if the car is in red and stopped for too long, end the episode
        if self.current_state == "Red_Stopped" and action == "Stop":
            self.done = True  # End the episode if stopped at a red light

        return next_state, reward, self.done  # Return the results

    # ==========================
    # Render Function
    # ==========================
    def render(self):
        """
        Display the current state of the environment.
        """
        print(f"Current State: {self.current_state}")  # Output the current state of the car


## Simulation Execution

In [4]:
if __name__ == "__main__":
    # Create an instance of the SelfDrivingCar environment
    env = SelfDrivingCarEnv()
    # Reset the environment to get the initial state
    state = env.reset()
    done = False  # Initialize done flag

    # Run the simulation
    while not done:
        # Randomly choose an action for simplicity
        action = random.choice(env.actions)  # Select a random action from available actions
        # Execute the action and observe the next state and reward
        next_state, reward, done = env.step(action)  # Take a step in the environment
        # Render the current state of the environment
        env.render()  # Show the current state
        # Print the action taken, reward received, and next state
        print(f"Action Taken: {action}, Reward: {reward}, Next State: {next_state}")  # Output the action, reward, and next state

    print("Episode finished.")  # Indicate that the episode has ended

Current State: Red_Stopped
Action Taken: Stop, Reward: -5, Next State: Red_Stopped
Episode finished.


# Program 2: Robot Navigation in a Grid World

## Importing Required Libraries

In [5]:
import numpy as np
import random

## Environment Class for Grid World

In [6]:
class GridWorldEnv:
    def __init__(self):
        """
        Initialize the environment for the grid world.
        Define the grid size, state space, action space, initial state,
        goal state, obstacles, and discount factor.
        """
        # Define the grid size
        self.grid_size = 4  # 4x4 grid
        self.state_space = self.grid_size * self.grid_size  # Total number of states
        self.action_space = 4  # Number of possible actions (UP, DOWN, LEFT, RIGHT)
        self.current_state = random.randint(0, self.state_space - 1)  # Randomly initialize current state
        self.goal_state = 15  # Goal state located at the bottom-right corner
        self.done = False  # Flag to indicate if the episode is done
        self.obstacles = [5, 6, 10, 11]  # Define obstacles in the grid
        self.gamma = 0.95  # Discount factor for future rewards

    # ==========================
    # Reset Function
    # ==========================
    def reset(self):
        """
        Reset the environment to a random initial state.
        Returns the initial state after resetting.
        """
        # Reset to a random state that is not an obstacle or the goal
        self.current_state = random.choice([i for i in range(self.state_space) if i not in self.obstacles and i != self.goal_state])
        self.done = False  # Reset the done flag
        return self.current_state  # Return the initial state

    # ==========================
    # Step Function
    # ==========================
    def step(self, action):
        """
        Execute the given action in the current state.
        Update the state based on the action taken, return the next state,
        reward received, and whether the episode is done.
        """
        # Convert the current state to row and column in the grid
        row, col = divmod(self.current_state, self.grid_size)

        # Determine the new position based on the action taken
        if action == 0:  # UP
            new_row, new_col = max(row - 1, 0), col
        elif action == 1:  # DOWN
            new_row, new_col = min(row + 1, self.grid_size - 1), col
        elif action == 2:  # LEFT
            new_row, new_col = row, max(col - 1, 0)
        else:  # RIGHT
            new_row, new_col = row, min(col + 1, self.grid_size - 1)

        new_state = new_row * self.grid_size + new_col  # Calculate the new state

        # Check if the new state is an obstacle
        if new_state in self.obstacles:
            reward = -10  # Penalty for hitting an obstacle
            next_state = self.current_state  # Stay in the same state
        else:
            if new_state == self.goal_state:
                reward = 10  # Reward for reaching the goal
                self.done = True  # Mark the episode as done
            else:
                reward = -1  # Penalty for a normal move
            next_state = new_state  # Update the next state

        self.current_state = next_state  # Update the current state
        return next_state, reward, self.done  # Return next state, reward, and done status

    # ==========================
    # Render Function
    # ==========================
    def render(self):
        """
        Display the current state of the environment.
        """
        grid = np.zeros((self.grid_size, self.grid_size), dtype=int)  # Create a grid of zeros
        for obstacle in self.obstacles:
            grid[obstacle // self.grid_size][obstacle % self.grid_size] = -1  # Mark obstacles in the grid
        grid[self.goal_state // self.grid_size][self.goal_state % self.grid_size] = 1  # Mark the goal
        grid[self.current_state // self.grid_size][self.current_state % self.grid_size] = 2  # Mark the current position
        print(grid)  # Print the grid

## Simulation Execution

In [7]:
if __name__ == "__main__":
    # Create an instance of the GridWorld environment
    env = GridWorldEnv()
    # Reset the environment to get the initial state
    state = env.reset()
    done = False  # Initialize done flag

    # Run the simulation
    while not done:
        # Randomly choose an action for simplicity
        action = random.randint(0, 3)  # Select a random action from available actions
        # Execute the action and observe the next state and reward
        next_state, reward, done = env.step(action)  # Take a step in the environment
        # Render the current state of the environment
        env.render()  # Show the current state
        # Print the action taken, reward received, and next state
        print(f"Action Taken: {action}, Reward: {reward}, Next State: {next_state}")  # Output the action, reward, and next state

    print("Episode finished.")  # Indicate that the episode has ended

[[ 2  0  0  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 0, Reward: -1, Next State: 0
[[ 0  0  0  0]
 [ 2 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 1, Reward: -1, Next State: 4
[[ 2  0  0  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 0, Reward: -1, Next State: 0
[[ 0  0  0  0]
 [ 2 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 1, Reward: -1, Next State: 4
[[ 2  0  0  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 0, Reward: -1, Next State: 0
[[ 0  0  0  0]
 [ 2 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 1, Reward: -1, Next State: 4
[[ 2  0  0  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 0, Reward: -1, Next State: 0
[[ 0  2  0  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 3, Reward: -1, Next State: 1
[[ 0  0  2  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Action Taken: 3, Reward: -1, Next State: 2
[[ 0  0  2  0]
 [ 0 -1 -1  0]
 [ 0  0 -1 -1]
 [ 0  0  0  1]]
Act