In [None]:
def cleaning_policy(robot_pos, dirt_map):
    """
    Write your cleaning policy here!

    This example is very simple. It cleans if on dirt, otherwise it moves randomly.

    Args:
        robot_pos: [row, col] - Current position of the robot
        dirt_map: 2D numpy array where 1 means dirt

    Returns:
        action: Integer 0-4 where:
            0: Move Up
            1: Move Right
            2: Move Down
            3: Move Left
            4: Clean
    """
    # If on dirt, clean it
    if dirt_map[tuple(robot_pos)] == 1:
        return 4

    # Otherwise, move randomly
    return np.random.randint(0, 4)  # Random direction

Trial 1

In [None]:
import numpy as np


def cleaning_policy(robot_pos, dirt_map):
    """
    Greedy nearest-dirt policy:
      - If standing on dirt: CLEAN.
      - Else: move one step toward the closest dirty tile (Manhattan distance).
    Actions: 0=Up, 1=Right, 2=Down, 3=Left, 4=Clean
    """
    r, c = robot_pos

    # 1) If on dirt, clean
    if dirt_map[r, c] == 1:
        return 4  # Clean

    # 2) Get all dirt coordinates
    dirt_positions = np.argwhere(dirt_map == 1)
    if dirt_positions.size == 0:
        # No dirt leftâ€”just do something benign
        return 0  # Up (arbitrary)

    # 3) Find nearest dirt by Manhattan distance
    dists = np.abs(dirt_positions[:, 0] - r) + np.abs(dirt_positions[:, 1] - c)
    nearest = dirt_positions[np.argmin(dists)]
    dr, dc = nearest[0] - r, nearest[1] - c

    # 4) Move one step toward the nearest dirt
    # Prefer the axis with larger absolute gap; on ties, prefer vertical first
    if abs(dr) > abs(dc):
        return 2 if dr > 0 else 0  # Down or Up
    elif abs(dc) > 0:
        return 1 if dc > 0 else 3  # Right or Left
    else:
        # Shouldn't happen (we'd have cleaned above), but safe-guard:
        return 4


# Took 55 steps

Trial 2

In [None]:
import numpy as np

# keep track of the current target across calls
_last_target = None


def cleaning_policy(robot_pos, dirt_map):
    """
    Improved nearest-dirt policy with light memory:
      - Clean if on dirt.
      - Otherwise, keep moving toward the same target until it's clean.
      - If target cleaned, choose the next nearest dirt.
    """
    global _last_target
    r, c = robot_pos

    # Clean if on dirt
    if dirt_map[r, c] == 1:
        _last_target = None
        return 4

    # Update target if none or target no longer dirty
    if _last_target is None or dirt_map[_last_target[0], _last_target[1]] == 0:
        dirt_positions = np.argwhere(dirt_map == 1)
        if len(dirt_positions) == 0:
            return 0  # nothing left
        dists = np.abs(dirt_positions[:, 0] - r) + np.abs(dirt_positions[:, 1] - c)
        _last_target = tuple(dirt_positions[np.argmin(dists)])

    tr, tc = _last_target
    dr, dc = tr - r, tc - c

    # Move in the dominant direction, break ties by alternating axis
    if abs(dr) >= abs(dc) and dr != 0:
        return 2 if dr > 0 else 0  # down/up
    elif dc != 0:
        return 1 if dc > 0 else 3  # right/left
    else:
        # If we somehow reach target but it's already clean
        _last_target = None
        return 4


# This took 51 step