In [None]:
from enum import Enum, auto
import random
from collections import deque
from dataclasses import dataclass, field
from typing import Deque, Dict, List, Optional, Set, Tuple

#Percept Definition

In [None]:
class Percept:
    # Attributes:
          # time_step (int): Current time step in the episode.
          # bump (bool): True if the agent bumped into a wall on this step.
          # breeze (bool): True if the agent is adjacent to a pit.
          # stench (bool): True if the agent is adjacent to a live Wumpus.
          # scream (bool): True if the agent hears a scream (Wumpus killed).
          # glitter (bool): True if there is gold in the agent's current square.
          # reward (int): Reward obtained for the last action.
          # done (bool): True if the episode has terminated.

    # Type annotations
    time_step: int
    bump: bool
    breeze: bool
    stench: bool
    scream: bool
    glitter: bool
    reward: int
    done: bool

    def __init__(self, time_step: int, bump: bool, breeze: bool, stench: bool,
                 scream: bool, glitter: bool, reward: int, done: bool):
        """Initialize all percept attributes."""
        self.time_step = time_step
        self.bump = bump
        self.breeze = breeze
        self.stench = stench
        self.scream = scream
        self.glitter = glitter
        self.reward = reward
        self.done = done

    def __str__(self) -> str:
        """Return a readable string summarizing this percept."""
        active_signals = []
        if self.bump:
            active_signals.append("Bump")
        if self.breeze:
            active_signals.append("Breeze")
        if self.stench:
            active_signals.append("Stench")
        if self.scream:
            active_signals.append("Scream")
        if self.glitter:
            active_signals.append("Glitter")

        signals_str = ", ".join(active_signals) if active_signals else "None"
        return (f"Percept(t={self.time_step}, "
                f"Signals=[{signals_str}], Reward={self.reward}, Done={self.done})")


In [None]:
# Verify if pecept class works
p = Percept(time_step=1, bump=False, breeze=True, stench=False,
            scream=False, glitter=True, reward=-1, done=False)
print(p)

Percept(t=1, Signals=[Breeze, Glitter], Reward=-1, Done=False)


# Action Defintion

In [None]:
class Action(Enum):
    # Turn left (rotate 90° counter-clockwise)
    LEFT = 0

    # Turn right (rotate 90° clockwise)
    RIGHT = 1

    # Move one cell forward in the current orientation
    FORWARD = 2

    # Pick up gold if present in the current square
    GRAB = 3

    # Fire the arrow (only once) in the current facing direction
    SHOOT = 4

    # Climb out of the cave (only valid from the start cell)
    CLIMB = 5

In [None]:
print(Action.GRAB.name)   # 'GRAB'
print(Action.GRAB.value)  # 3

GRAB
3


# Orientation Definition

In [None]:
class Orientation(Enum):
    E = 0
    S = 1
    W = 2
    N = 3

    def symbol(self) -> str:
        """Return the single-letter code representing this orientation."""
        return self.name  # e.g., Orientation.E -> "E"

    def symbol(self) -> str:
            """Return a visual arrow symbol representing this orientation."""
            symbols = {
                Orientation.E: "→",
                Orientation.S: "↓",
                Orientation.W: "←",
                Orientation.N: "↑"
            }
            return symbols[self]


    def turn_right(self) -> 'Orientation':
        """Return a new orientation turned 90° clockwise."""
        # Clockwise rotation: E → S → W → N → E
        return Orientation((self.value + 1) % 4)

    def turn_left(self) -> 'Orientation':
        """Return a new orientation turned 90° counter-clockwise."""
        # Counterclockwise rotation: E → N → W → S → E
        return Orientation((self.value - 1) % 4)

In [None]:
o = Orientation.E
print(o.symbol())          # E
print(o.turn_right())      # Orientation.S
print(o.turn_left())       # Orientation.N
print(o.turn_left().name)  # N


→
Orientation.S
Orientation.N
N


# Location Definition

In [None]:
class Location:
    """
        Represents a single cell in the Wumpus World grid.

        The coordinate system is 1-based:
            - (1,1) is the bottom-left corner (the starting cell for the agent).
            - x increases to the East (right).
            - y increases to the North (up).
    """
    x: int
    y: int

    def __init__(self, x: int, y: int):
        """Initialize a location with x and y coordinates."""
        self.x = x
        self.y = y

    def __str__(self):
        """Return a human-readable string representation, e.g., '(2, 3)'."""
        return f'({self.x}, {self.y})'

    # ---------------------------------------------------------------------
    # RELATIVE POSITION CHECKS
    # ---------------------------------------------------------------------
    def is_left_of(self, location: 'Location') -> bool:
        """
        Return True if this location is immediately to the LEFT of another location.
        That means:
            - Both are on the same row (same y-coordinate)
            - This cell's x-coordinate is exactly one less than the other.
        """
        return self.y == location.y and self.x == location.x - 1

    def is_right_of(self, location: 'Location') -> bool:
        """
        Return True if this location is immediately to the RIGHT of another location.
        Conditions:
            - Both share the same row
            - This cell's x is exactly one greater.
        """
        return self.y == location.y and self.x == location.x + 1

    def is_above(self, location: 'Location') -> bool:
        """
        Return True if this location is immediately ABOVE another location.
        Conditions:
            - Both share the same column
            - This cell’s y is exactly one greater.
        """
        return self.x == location.x and self.y == location.y + 1

    def is_below(self, location: 'Location') -> bool:
        """
        Return True if this location is immediately BELOW another location.
        Conditions:
            - Both share the same column
            - This cell’s y is exactly one less.
        """
        return self.x == location.x and self.y == location.y - 1

    def neighbours(self, width: int = 4, height: int = 4) -> List['Location']:
        """
        Return a list of the four adjacent (cardinal) neighbors of this cell.
        Directions considered: East, West, North, South.
        The function automatically removes cells that would be outside a width×height grid.
        """
        # Generate all four cardinal neighbors
        candidates = [
            Location(self.x + 1, self.y),  # East neighbor
            Location(self.x - 1, self.y),  # West neighbor
            Location(self.x, self.y + 1),  # North neighbor
            Location(self.x, self.y - 1),  # South neighbor
        ]

        # Keep only those inside the grid (1..width, 1..height)
        return [c for c in candidates if 1 <= c.x <= width and 1 <= c.y <= height]

    # ---------------------------------------------------------------------
    # LOCATION COMPARISON
    # ---------------------------------------------------------------------
    def is_location(self, location: 'Location') -> bool:
        """
        Return True if this location has exactly the same coordinates as another.

        This is a convenience wrapper around coordinate equality (x and y both match).
        Example:
            Location(2, 3).is_location(Location(2, 3)) → True
            Location(2, 3).is_location(Location(3, 3)) → False
        """
        return self.x == location.x and self.y == location.y

    # ---------------------------------------------------------------------
    # EDGE DETECTION (used for wall/boundary logic)
    # ---------------------------------------------------------------------
    def at_left_edge(self) -> bool:
        """
        Return True if this cell is at the LEFT boundary of the grid.
        Left edge means x == 1 (no valid cells further west).
        """
        return self.x == 1

    def at_right_edge(self, width: int = 4) -> bool:
        """
        Return True if this cell is at the RIGHT boundary of the grid (x == width).
        """
        return self.x == width

    def at_top_edge(self, height: int = 4) -> bool:
        """
        Return True if this cell is at the TOP boundary of the grid (y == height).
        """
        return self.y == height

    def at_bottom_edge(self) -> bool:
        """
        Return True if this cell is at the BOTTOM boundary of the grid.
        The bottom edge corresponds to y == 1.
        """
        return self.y == 1

    # ---------------------------------------------------------------------
    # MOVE FORWARD OPERATION
    # ---------------------------------------------------------------------
    def forward(self, orientation: 'Orientation',
                width: int = 4, height: int = 4) -> bool:
        """
        Attempt to move one cell forward in the given orientation on a width×height grid.
        This method updates the agent’s current coordinates based on which
        direction they are facing (E, W, N, S).

        If the move would take the agent *outside the grid boundaries*, then:
            • The position is left unchanged.
            • The function returns True to indicate a "bump" percept.

        If the move is valid and inside the grid:
            • The coordinates are updated to the new location.
            • The function returns False (no bump occurred).
        """
        # Store tentative next position
        nx, ny = self.x, self.y

        # Determine next cell based on orientation
        if orientation.name == "E":
            nx += 1      # move right
        elif orientation.name == "W":
            nx -= 1      # move left
        elif orientation.name == "N":
            ny += 1      # move upward
        else:  # orientation.name == "S"
            ny -= 1      # move downward

        # Check if new position is within 1..width and 1..height
        if not (1 <= nx <= width and 1 <= ny <= height):
            # Out of bounds → agent bumps into wall, position unchanged
            return True

        # Valid move → update coordinates
        self.x, self.y = nx, ny
        return False

    # ---------------------------------------------------------------------
    # SETTER / COPY UTILITIES
    # ---------------------------------------------------------------------
    def set_to(self, location: 'Location'):
        """
        Set this location's coordinates to match another location (in-place).

        This is a convenience method to update the current object without creating
        a new Location instance.
        """
        self.x, self.y = location.x, location.y

    # ---------------------------------------------------------------------
    # LINEAR INDEX CONVERSIONS (row-major from bottom row)
    # ---------------------------------------------------------------------
    @staticmethod
    def from_linear(n: int, width: int = 4, height: int = 4) -> 'Location':
        """
        Convert a 0-based linear index (0..width*height-1) into 1-based grid coordinates (x, y).

        Mapping uses row-major order with the bottom row first:
            0 → (1,1), 1 → (2,1), ..., (width-1) → (width,1),
            width → (1,2), ..., (width*height-1) → (width,height)
        """
        if not (0 <= n < width * height):
            raise ValueError(f"Linear index out of bounds (0..{width*height-1}).")

        # Compute 1-based coordinates for a width×height grid
        x = (n % width) + 1
        y = (n // width) + 1
        return Location(x, y)

    def to_linear(self, width: int = 4) -> int:
        """
        Convert this (x, y) location into a 0-based linear index for a width×height grid.

        This is the inverse of from_linear(), using the same row-major mapping:
            (1,1) → 0, (2,1) → 1, ..., (width,1) → (width-1),
            (1,2) → width, ..., (width,height) → (width*height-1)
        """
        # Shift both coordinates to 0-based, then compute row-major index.
        return (self.y - 1) * width + (self.x - 1)

    # ---------------------------------------------------------------------
    # RANDOM SAMPLING ON THE GRID
    # ---------------------------------------------------------------------
    @staticmethod
    def random(width: int = 4, height: int = 4) -> 'Location':
        """
        Sample a uniformly random cell on a width×height grid (1-based coordinates).
        """
        import random as _rnd
        return Location(_rnd.randint(1, width), _rnd.randint(1, height))


# Environment Definition

In [None]:
class Environment:
    wumpus_location: Location
    wumpus_alive: bool
    agent_location: Location
    agent_orientation: Orientation
    agent_has_arrow: bool
    agent_has_gold: bool
    game_over: bool
    gold_location: Location
    pit_locations: List[Location]
    time_step: int
    WIDTH: int
    HEIGHT: int
    allow_climb_without_gold: bool
    pit_prob: float

    # ---------------------------------------------------------------------
    # EPISODE INITIALIZATION
    # ---------------------------------------------------------------------
    def init(self, width: int = 4, height: int = 4,
             pit_prob: float = 0.2, allow_climb_without_gold: bool = True):
        """
        Reset the world and start a new episode.

        World layout (width×height, 1-based):
          - Agent starts at (1,1), facing East, with one arrow and no gold.
          - Place exactly one Wumpus (not at start), alive.
          - Place exactly one Gold (not at start).
          - For each non-start cell, place a Pit with probability `pit_prob`.
            (Overlaps with Wumpus/Gold are allowed; start is always safe.)
        """
        # Store config/state flags
        self.WIDTH = width
        self.HEIGHT = height
        self.pit_prob = pit_prob
        self.allow_climb_without_gold = allow_climb_without_gold

        # Agent state
        self.agent_location = Location(1, 1)
        self.agent_orientation = Orientation.E
        self.agent_has_arrow = True
        self.agent_has_gold = False

        # Episode flags
        self.game_over = False
        self.time_step = 0

        # World objects
        self.make_wumpus()
        self.make_gold()
        self.make_pits(self.pit_prob)

        # Initial percept (no action taken yet → reward=0; bump/scream=False)
        return Percept(
            time_step=self.time_step,
            bump=False,
            breeze=self.is_breeze(),
            stench=self.is_stench(),
            scream=False,
            glitter=self.is_glitter(),
            reward=0,
            done=self.game_over
        )

    # ---------------------------------------------------------------------
    # RANDOM PLACEMENT HELPERS
    # ---------------------------------------------------------------------
    def make_wumpus(self):
        """
        Choose a random location for the Wumpus (not the start) and set alive=True.
        Overlap with pits/gold is allowed.
        """
        while True:
            loc = Location.random(self.WIDTH, self.HEIGHT)
            if not loc.is_location(Location(1, 1)):
                self.wumpus_location = loc
                self.wumpus_alive = True
                return

    def make_gold(self):
        """
        Choose a random location for the Gold (not the start).
        Overlap with pits/Wumpus is allowed.
        """
        while True:
            loc = Location.random(self.WIDTH, self.HEIGHT)
            if not loc.is_location(Location(1, 1)):
                self.gold_location = loc
                return

    def make_pits(self, pit_prob: float):
        """
        For every non-start cell, independently place a Pit with probability `pit_prob`.
        """
        pits: List[Location] = []
        for n in range(self.WIDTH * self.HEIGHT):
            cell = Location.from_linear(n, self.WIDTH, self.HEIGHT)
            if cell.is_location(Location(1, 1)):
                continue  # start is always safe
            if random.random() < pit_prob:
                pits.append(cell)
        self.pit_locations = pits

    # ---------------------------------------------------------------------
    # LOCATION QUERIES (safe, explicit comparisons w/o relying on __eq__/__hash__)
    # ---------------------------------------------------------------------
    def is_pit_at(self, location: Location) -> bool:
        """Return True if there is a Pit at `location`."""
        return any(p.is_location(location) for p in self.pit_locations)

    def is_pit_adjacent_to_agent(self) -> bool:
        """
        Return True if a Pit is in any cardinally adjacent cell to the agent
        (or same cell—though if same cell, agent is dying/just died).
        """
        here = self.agent_location
        if self.is_pit_at(here):
            return True
        for n in here.neighbours(self.WIDTH, self.HEIGHT):
            if self.is_pit_at(n):
                return True
        return False

    def is_wumpus_adjacent_to_agent(self) -> bool:
        """
        Return True if the (alive) Wumpus is in a cardinal neighbor (or same cell).
        """
        if not self.wumpus_alive:
            return False
        here = self.agent_location
        if self.is_wumpus_at(here):
            return True
        for n in here.neighbours(self.WIDTH, self.HEIGHT):
            if self.is_wumpus_at(n):
                return True
        return False

    def is_agent_at_hazard(self) -> bool:
        """
        Return True if the agent is on a Pit or on the (alive) Wumpus.
        Used immediately after a successful Forward to check death.
        """
        return self.is_pit_at(self.agent_location) or (
            self.is_wumpus_at(self.agent_location) and self.wumpus_alive
        )

    def is_wumpus_at(self, location: Location) -> bool:
        """Return True if the Wumpus is at `location` (alive or dead)."""
        return self.wumpus_location is not None and self.wumpus_location.is_location(location)

    def is_agent_at(self, location: Location) -> bool:
        """Return True if the agent is at `location`."""
        return self.agent_location.is_location(location)

    def is_gold_at(self, location: Location) -> bool:
        """Return True if the Gold is at `location` (i.e., not yet grabbed)."""
        return self.gold_location is not None and self.gold_location.is_location(location)

    # ---------------------------------------------------------------------
    # PERCEPT QUERIES (Breeze / Stench / Glitter)
    # ---------------------------------------------------------------------
    def is_glitter(self) -> bool:
        """Return True if the agent is in the same cell as the Gold."""
        return self.is_gold_at(self.agent_location)

    def is_breeze(self) -> bool:
        """
        Return True if a Pit is adjacent (or same cell).
        Note: if agent is in a Pit, they will die that step; including 'same cell'
        here makes the percept logic monotone and easy to read.
        """
        return self.is_pit_adjacent_to_agent()

    def is_stench(self) -> bool:
        """
        Return True if the (alive) Wumpus is adjacent (or same cell).
        If the Wumpus is dead, there is no Stench.
        """
        return self.is_wumpus_adjacent_to_agent()

    # ---------------------------------------------------------------------
    # FIRING LINE / COMBAT HELPERS
    # ---------------------------------------------------------------------
    def wumpus_in_line_of_fire(self) -> bool:
        """
        Return True if, from the agent’s current cell and orientation,
        the Wumpus lies strictly ahead in the same row or column.
        """
        if not (self.wumpus_alive and self.wumpus_location):
            return False

        ax, ay = self.agent_location.x, self.agent_location.y
        wx, wy = self.wumpus_location.x, self.wumpus_location.y

        if self.agent_orientation.name == "E":
            return wy == ay and wx > ax
        if self.agent_orientation.name == "W":
            return wy == ay and wx < ax
        if self.agent_orientation.name == "N":
            return wx == ax and wy > ay
        # SOUTH
        return wx == ax and wy < ay

    def kill_attempt(self) -> bool:
        """
        If the Wumpus is alive and in the line of fire, kill it and return True.
        Otherwise, return False.
        """
        if self.wumpus_alive and self.wumpus_in_line_of_fire():
            self.wumpus_alive = False
            return True
        return False

    # ---------------------------------------------------------------------
    # MAIN TRANSITION FUNCTION
    # ---------------------------------------------------------------------
    def step(self, action: Action) -> Percept:
        """
        Apply an action, update state, and return the resulting Percept.

        Reward components:
            - Base per-step cost:             -1  (always)
            - First SHOOT (arrow available): -10  (consumes arrow)
            - Death (Pit or live Wumpus):  -1000  (terminal)
            - CLIMB at (1,1) with Gold:   +1000  (terminal)
            - CLIMB at (1,1) without Gold:
                * if allow_climb_without_gold=True → terminal with only step cost
                * else ignored (no termination, still pays step cost)
        """
        assert not self.game_over, "Episode already finished. Call init() for a new one."
        self.time_step += 1

        # Transient signals for this step
        bump = False
        scream = False

        # Base step cost
        reward = -1

        # ---------------------------
        # Dispatch on action
        # ---------------------------
        if action == Action.LEFT:
            self.agent_orientation = self.agent_orientation.turn_left()

        elif action == Action.RIGHT:
            self.agent_orientation = self.agent_orientation.turn_right()

        elif action == Action.FORWARD:
            # Try to move forward; Location.forward returns True if bumped (no move)
            bumped = self.agent_location.forward(self.agent_orientation, self.WIDTH, self.HEIGHT)
            bump = bumped
            if not bumped:
                # After a successful move, check for fatal hazards
                if self.is_agent_at_hazard():
                    reward += -1000
                    self.game_over = True

        elif action == Action.GRAB:
            # Pick up gold if present
            if self.is_glitter():
                self.agent_has_gold = True
                self.gold_location = None

        elif action == Action.SHOOT:
            # Only the first time with an arrow should cost -10 and attempt a kill
            if self.agent_has_arrow:
                self.agent_has_arrow = False
                reward += -10
                if self.kill_attempt():  # sets wumpus_alive=False if hit
                    scream = True
            # If no arrow, no extra penalty/effect

        elif action == Action.CLIMB:
            # Only meaningful at the start cell (1,1)
            if self.agent_location.is_location(Location(1, 1)):
                if self.agent_has_gold:
                    reward += 1000
                    self.game_over = True
                else:
                    if self.allow_climb_without_gold:
                        # End episode with just the step cost already applied
                        self.game_over = True
                    # else: climbing without gold is ignored

        # ---------------------------
        # Build Percept for this step
        # ---------------------------
        percept = Percept(
            time_step=self.time_step,
            bump=bump,
            breeze=self.is_breeze(),
            stench=self.is_stench(),
            scream=scream,
            glitter=self.is_glitter(),
            reward=reward,
            done=self.game_over
        )
        return percept

    # ---------------------------------------------------------------------
    # VISUALIZATION OF THE GAME STATE
    # ---------------------------------------------------------------------
    def visualize(self):
        """
        Print a simple text grid showing the current world state.

        Legend:
            A→ A← A↑ A↓ : Agent and its facing direction
            P           : Pit
            W / w       : Wumpus (alive/dead)
            G           : Gold

        Coordinate system:
            (1,1) is bottom-left; printed from the top row down to the bottom.
        """
        for y in range(self.HEIGHT, 0, -1):  # print rows top→bottom
            line = '|'
            for x in range(1, self.WIDTH + 1):  # columns left→right
                loc = Location(x, y)
                cell_symbols = []  # dynamic list for whatever is in this cell

                # Agent (shows letter A plus its facing arrow)
                if self.is_agent_at(loc):
                    cell_symbols.append('A' + self.agent_orientation.symbol())

                # Pit
                if self.is_pit_at(loc):
                    cell_symbols.append('P')

                # Wumpus (alive/dead)
                if self.is_wumpus_at(loc):
                    cell_symbols.append('W' if self.wumpus_alive else 'w')

                # Gold
                if self.is_gold_at(loc):
                    cell_symbols.append('G')

                # If cell empty, leave a few spaces for alignment
                cell_str = ''.join(cell_symbols) if cell_symbols else '   '

                line += f'{cell_str:4}|'  # pad each cell to uniform width
            print(line)


# Agent definition

## Planner Definition

In [None]:
State = Tuple[int, int, int]  # (x, y, d) with x,y 1-based; d in {0..3}

# Define DX and DY for directional movement
DX = [1, 0, -1, 0]  # E, S, W, N
DY = [0, -1, 0, 1]  # E, S, W, N

# Helper functions for turning
def dir_left(d: int) -> int:
    """Turn counter-clockwise (E → N → W → S → E)."""
    return (d - 1) % 4

def dir_right(d: int) -> int:
    """Turn clockwise (E → S → W → N → E)."""
    return (d + 1) % 4

def bfs_shortest_actions(
    start: State,
    goal_cell: Tuple[int, int],
    safe_cells: Set[Tuple[int, int]],
    width: int,
    height: int,
) -> Optional[List[str]]:
    """
    Shortest path (unit cost) in orientation-augmented space.
    Actions are: "TurnLeft", "TurnRight", "Forward".
    Forward permitted only if the destination cell is in safe_cells and in-bounds.
    """
    sx, sy, sd = start
    if (sx, sy) == goal_cell:
        return []

    def in_bounds(x: int, y: int) -> bool:
        return 1 <= x <= width and 1 <= y <= height

    parent: Dict[State, Tuple[State, str]] = {}
    seen: Set[State] = {(sx, sy, sd)}
    q: Deque[State] = deque([(sx, sy, sd)])

    while q:
        x, y, d = q.popleft()

        # TurnLeft
        nl = (x, y, dir_left(d))
        if nl not in seen:
            seen.add(nl); parent[nl] = ((x, y, d), "TurnLeft")
            if (x, y) == goal_cell:
                return _reconstruct_actions(parent, nl)
            q.append(nl)

        # TurnRight
        nr = (x, y, dir_right(d))
        if nr not in seen:
            seen.add(nr); parent[nr] = ((x, y, d), "TurnRight")
            if (x, y) == goal_cell:
                return _reconstruct_actions(parent, nr)
            q.append(nr)

        # Forward (only into known-safe)
        fx, fy = x + DX[d], y + DY[d]
        if in_bounds(fx, fy) and (fx, fy) in safe_cells:
            nf = (fx, fy, d)
            if nf not in seen:
                seen.add(nf); parent[nf] = ((x, y, d), "Forward")
                if (fx, fy) == goal_cell:
                    return _reconstruct_actions(parent, nf)
                q.append(nf)

    return None  # no route through known-safe cells

def _reconstruct_actions(
    parent: Dict[State, Tuple[State, str]],
    goal: State
) -> List[str]:
    actions: List[str] = []
    cur: Optional[State] = goal
    while cur in parent:
        prev, a = parent[cur]
        actions.append(a)
        cur = prev
    actions.reverse()
    return actions

## NaiveAgent Definition

In [None]:
class NaiveAgent:
    """
    A naive agent that selects random actions and interacts with the Environment.
    It uses the updated Environment interface supporting dynamic grid sizes.
    """

    def __init__(self, width: int = 4, height: int = 4,
                 pit_prob: float = 0.2, allow_climb_without_gold: bool = True,
                 seed: int = None):
        """
        Initialize the NaiveAgent with optional environment parameters.

        Parameters
        ----------
        width : int
            Width of the grid (default: 4)
        height : int
            Height of the grid (default: 4)
        pit_prob : float
            Probability that a non-start cell contains a pit (default: 0.2)
        allow_climb_without_gold : bool
            Whether climbing without gold ends the episode (default: True)
        seed : int, optional
            Random seed for reproducibility (default: None)
        """
        self.width = width
        self.height = height
        self.pit_prob = pit_prob
        self.allow_climb_without_gold = allow_climb_without_gold

        if seed is not None:
            random.seed(seed)

    def choose_action(self):
        """Return a randomly chosen action from the Action enum."""
        return random.choice(list(Action))

    def run(self):
        """Run a full episode of random actions until the game ends."""
        env = Environment()
        cumulative_reward = 0

        # Initialize the environment using the new parameterized interface
        percept = env.init(
            width=self.width,
            height=self.height,
            pit_prob=self.pit_prob,
            allow_climb_without_gold=self.allow_climb_without_gold
        )

        # Main loop: random actions until terminal state
        while not percept.done:
            env.visualize()
            print('Percept:', percept)
            action = self.choose_action()
            print('\nAction:', action, '\n')
            percept = env.step(action)
            cumulative_reward += percept.reward

        # Final visualization and summary
        env.visualize()
        print('Percept:', percept)
        print('Cumulative reward:', cumulative_reward)


In [None]:
agent = NaiveAgent()
for _ in range(6):
    print(agent.choose_action())


Action.LEFT
Action.FORWARD
Action.SHOOT
Action.GRAB
Action.GRAB
Action.FORWARD


## MovePlanningAgent Definition

In [None]:
@dataclass
class MovePlanningAgent:
    width: int = 4
    height: int = 4
    allow_climb_without_gold: bool = True
    pit_prob: float = 0.2

    # runtime state
    x: int = 1
    y: int = 1
    d: int = 0  # 0:E, 1:S, 2:W, 3:N
    has_gold: bool = False
    visited_safe: Set[Tuple[int, int]] = field(default_factory=lambda: {(1, 1)})
    plan: Deque[str] = field(default_factory=deque)
    rng: random.Random = field(default_factory=random.Random)
    cumulative_reward: int = 0

    # environment is injected at run()
    env: object = None

    def run(self, Environment, Action):
        """Assumes your Environment has .init(pit_prob, allowClimbWithoutGold), .step(action), .visualize()."""
        # initialize episode
        self.env = Environment()

        percept = self.env.init(
              width=self.width,
              height=self.height,
              pit_prob=self.pit_prob,
              allow_climb_without_gold=self.allow_climb_without_gold,
        )


        self.x, self.y, self.d = 1, 1, 0
        self.has_gold = False
        self.visited_safe = {(1, 1)}
        self.plan.clear()
        self.cumulative_reward = 0


        while not percept.done:
              # 1) Show board and current percept (same as NaiveAgent)
              self.env.visualize()
              print('Percept:', percept)

              # 2) Deterministic reaction to glitter
              if percept.glitter and not self.has_gold:
                  print('\nAction:', Action.GRAB, '\n')
                  percept = self.env.step(Action.GRAB)
                  self.cumulative_reward += percept.reward
                  self.has_gold = True
                  if not percept.done:
                      self.visited_safe.add((self.x, self.y))
                  # plan shortest safe path to start
                  self.plan = deque(bfs_shortest_actions(
                      (self.x, self.y, self.d), (1, 1), self.visited_safe, self.width, self.height
                  ) or [])
                  continue

              # 3) If executing a plan, take the next planned action
              if self.plan:
                  action = self._action_from_label(self.plan.popleft(), Action)
                  print('\nAction:', action, '\n')
                  # _act_and_update() will call env.step() and update pose/safe set
                  percept = self._act_and_update(action)

                  # If plan finished at start with gold, climb out
                  if not self.plan and self.has_gold and (self.x, self.y) == (1, 1) and not percept.done:
                      print('\nAction:', Action.CLIMB, '\n')
                      percept = self._act_and_update(Action.CLIMB)
                  continue

              # 4) Otherwise: explore (no random Grab/Climb)
              action = self.rng.choice([Action.FORWARD, Action.LEFT, Action.RIGHT, Action.SHOOT])
              print('\nAction:', action, '\n')
              percept = self._act_and_update(action)


        # final board
        try: self.env.visualize()
        except Exception: pass

        print("Percept:", percept)
        print("Cumulative reward:", self.cumulative_reward)

    # ---- helpers ----
    def _act_and_update(self, action):
        """Dispatch action to env, update pose and safe set based on percept."""
        p = self.env.step(action)
        self.cumulative_reward += p.reward

        # Update heading/position consistent with Assignment 1 semantics
        name = getattr(action, "name", str(action))
        if name == "LEFT":
            self.d = dir_left(self.d)
        elif name == "RIGHT":
            self.d = dir_right(self.d)
        elif name == "FORWARD":
            # Only advance on no-bump
            if not p.bump:
                self.x += DX[self.d]
                self.y += DY[self.d]

        if not p.done:
            self.visited_safe.add((self.x, self.y))
        return p

    @staticmethod
    def _action_from_label(label: str, Action):
        return {
            "Forward": Action.FORWARD,
            "TurnLeft": Action.LEFT,
            "TurnRight": Action.RIGHT,
        }[label]

# Visualization Of The Game State

## Sanity tests

In [None]:
env = Environment()
# p0 = env.init(width=4, height=4, pit_prob=0.0, allow_climb_without_gold=True) # Safe world (no pits)
p1 = env.init(width=4, height=4, pit_prob=0.2, allow_climb_without_gold=True)
env.visualize()


|    |    |P   |    |
|    |    |    |PW  |
|    |    |    |    |
|A→  |P   |    |PG  |


In [None]:
# # Path with pits disabled (should sometimes succeed quickly):

# move_planning_agent_happy = MovePlanningAgent(pit_prob=0.0)
# move_planning_agent_happy.run(Environment, Action)

In [None]:
# Planner sanity (inline harness):
safe_line = {(1,1),(2,1),(3,1),(4,1)}
plan = bfs_shortest_actions(start=(1,1,0), goal_cell=(4,1),
                            safe_cells=safe_line, width=4, height=4)
print(plan)  # expect ['Forward','Forward','Forward']

['Forward', 'Forward', 'Forward']


In [None]:
# Turn cost check
plan = bfs_shortest_actions(start=(1,1,1), goal_cell=(2,1),
                            safe_cells={(1,1),(2,1)}, width=4, height=4)
print(plan)  # one of ['TurnRight','TurnRight','Forward'] or ['TurnLeft','Forward']


['TurnLeft', 'Forward']


## Playing Game

In [None]:
# NAIVE AGENT

# naive_agent = NaiveAgent(width=4, height=4, pit_prob=0.2, allow_climb_without_gold=False)
# naive_agent.run()

#----------------------------------------------------------------------------------------------------

# MOVE PLANNING AGENT


move_planning_agent = MovePlanningAgent(width=4, height=4, pit_prob=0.2, allow_climb_without_gold=False)
move_planning_agent.run(Environment, Action)


|    |    |    |W   |
|    |    |    |    |
|P   |    |G   |    |
|A→  |    |    |    |
Percept: Percept(t=0, Signals=[Breeze], Reward=0, Done=False)

Action: Action.LEFT 

|    |    |    |W   |
|    |    |    |    |
|P   |    |G   |    |
|A↑  |    |    |    |
Percept: Percept(t=1, Signals=[Breeze], Reward=-1, Done=False)

Action: Action.LEFT 

|    |    |    |W   |
|    |    |    |    |
|P   |    |G   |    |
|A←  |    |    |    |
Percept: Percept(t=2, Signals=[Breeze], Reward=-1, Done=False)

Action: Action.RIGHT 

|    |    |    |W   |
|    |    |    |    |
|P   |    |G   |    |
|A↑  |    |    |    |
Percept: Percept(t=3, Signals=[Breeze], Reward=-1, Done=False)

Action: Action.FORWARD 

|    |    |    |W   |
|    |    |    |    |
|A↑P |    |G   |    |
|    |    |    |    |
Percept: Percept(t=4, Signals=[Breeze], Reward=-1001, Done=True)
Cumulative reward: -1004
