
## Ptactical No 6

Implement Reinforcement Learning using an example of a maze environment that the 
agent needs to explore.


In [1]:
import numpy as np

def create_maze():
    rows = int(input("Enter the number of rows: "))
    cols = int(input("Enter the number of columns: "))
    maze = np.zeros((rows, cols), dtype=int)
    print("Enter the maze layout row by row (0 for free space, 1 for wall):")
    for row in range(rows):
        while True:
            try:
                row_data = list(map(int, input().strip().split()))
                if len(row_data) != cols:
                    raise ValueError(f"Expected {cols} columns, but got {len(row_data)}.")
                maze[row] = row_data
                break
            except ValueError as e:
                print(f"Error: {e}. Please enter {cols} integers separated by spaces.")
    return maze

maze = create_maze()

class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state):
        predicted = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (target - predicted)

# Map 2D maze coordinates to 1D state space
def get_state(row, col, cols):
    return row * cols + col

def get_coordinates(state, cols):
    return divmod(state, cols)

# Initialize agent
num_states = maze.size
num_actions = 4

initial_state = get_state(0, 0, maze.shape[1])
goal_state = get_state(maze.shape[0] - 1, maze.shape[1] - 1, maze.shape[1])

agent = QLearningAgent(num_states, num_actions)

def train_agent(agent, num_episodes=1000):
    for episode in range(num_episodes):
        state = initial_state
        done = False
        while not done:
            action = agent.choose_action(state)
            row, col = get_coordinates(state, maze.shape[1])
            next_state = state
            if action == 0:  # Move Up
                if row > 0:
                    next_row, next_col = row - 1, col
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 1:  # Move Down
                if row < maze.shape[0] - 1:
                    next_row, next_col = row + 1, col
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 2:  # Move Left
                if col > 0:
                    next_row, next_col = row, col - 1
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 3:  # Move Right
                if col < maze.shape[1] - 1:
                    next_row, next_col = row, col + 1
                    next_state = get_state(next_row, next_col, maze.shape[1])

            if (0 <= next_state < num_states) and (maze.flat[next_state] == 0):  # Check if the move is valid
                if next_state == goal_state:
                    reward = 1  # Reached the goal
                    done = True
                else:
                    reward = 0  # Moved to an empty cell
                agent.learn(state, action, reward, next_state)
                state = next_state

train_agent(agent, num_episodes=1000)

def test_agent(agent):
    state = initial_state
    while state != goal_state:
        action = agent.choose_action(state)
        row, col = get_coordinates(state, maze.shape[1])
        print(f"Current State: ({row}, {col}), Chosen Action: {action}")
        if action == 0:
            state = get_state(row - 1, col, maze.shape[1])
        elif action == 1:
            state = get_state(row + 1, col, maze.shape[1])
        elif action == 2:
            state = get_state(row, col - 1, maze.shape[1])
        elif action == 3:
            state = get_state(row, col + 1, maze.shape[1])
        print(f"New State: ({get_coordinates(state, maze.shape[1])})")
    print("Agent reached the goal!")

test_agent(agent)


Enter the number of rows:  3
Enter the number of columns:  3


Enter the maze layout row by row (0 for free space, 1 for wall):


 0 0 0
 1 1 0
 0 0 0


Current State: (0, 0), Chosen Action: 3
New State: ((0, 1))
Current State: (0, 1), Chosen Action: 3
New State: ((0, 2))
Current State: (0, 2), Chosen Action: 1
New State: ((1, 2))
Current State: (1, 2), Chosen Action: 1
New State: ((2, 2))
Agent reached the goal!
