In [None]:
import numpy as np

# --- Create Maze ---
def create_maze():
    rows = int(input("Enter the number of rows: "))
    cols = int(input("Enter the number of columns: "))
    maze = np.zeros((rows, cols), dtype=int)
    print("Enter the maze layout row by row (0=empty, 1=wall):")
    for row in range(rows):
        row_data = input(f"Row {row+1}: ").strip()
        maze[row] = [int(cell) for cell in row_data.split()]
    return maze

maze = create_maze()
rows, cols = maze.shape
num_states = rows * cols
num_actions = 4  # up, down, left, right
start_state = 0
goal_state = num_states - 1

# --- Q-Learning Agent ---
class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state):
        predict = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (target - predict)

agent = QLearningAgent(num_states, num_actions)

# --- Helper function to check valid moves ---
def is_valid_move(state, next_state):
    if not (0 <= next_state < num_states):
        return False
    r, c = divmod(next_state, cols)
    return maze[r, c] == 0

# --- Train Agent ---
def train_agent(agent, num_episodes=1000):
    for episode in range(num_episodes):
        state = start_state
        done = False
        while not done:
            action = agent.choose_action(state)

            # Movement logic
            next_state = state
            if action == 0:  # Up
                next_state = state - cols
            elif action == 1:  # Down
                next_state = state + cols
            elif action == 2:  # Left
                if state % cols != 0:  # Not on left edge
                    next_state = state - 1
            elif action == 3:  # Right
                if (state + 1) % cols != 0:  # Not on right edge
                    next_state = state + 1

            # Validate move
            if not is_valid_move(state, next_state):
                reward = -1
                next_state = state
            elif next_state == goal_state:
                reward = 10
                done = True
            else:
                reward = -0.1

            agent.learn(state, action, reward, next_state)
            state = next_state

train_agent(agent, num_episodes=1000)

# --- Test Agent ---
def test_agent(agent):
    state = start_state
    print("\nAgent testing path:\n")
    steps = 0
    while state != goal_state and steps < 100:
        action = np.argmax(agent.q_table[state])
        print(f"Step {steps+1}: State {state}, Action {action}")

        next_state = state
        if action == 0:
            next_state = state - cols
        elif action == 1:
            next_state = state + cols
        elif action == 2:
            if state % cols != 0:
                next_state = state - 1
        elif action == 3:
            if (state + 1) % cols != 0:
                next_state = state + 1

        if not is_valid_move(state, next_state):
            print("Hit a wall! Staying in same position.")
            next_state = state

        state = next_state
        steps += 1

    if state == goal_state:
        print("\nðŸŽ¯ Agent reached the goal successfully!")
    else:
        print("\nâš  Agent failed to reach the goal within step limit.")

test_agent(agent)


Enter the number of rows:  3
Enter the number of columns:  3


Enter the maze layout row by row (0=empty, 1=wall):


Row 1:  1
Row 2:  1
Row 3:  0
