In [11]:
import gymnasium as gym
from heapq import heappush, heappop
import numpy as np
import random
from gymnasium import spaces
from collections import deque

In [12]:
def bfs_reachable(grid, start, targets):
    """
    Check if all target cells are reachable from start on grid (0=free, 1=obstacle).
    """
    H, W = grid.shape
    visited = np.zeros_like(grid, dtype=bool)
    queue = deque([start])
    visited[start] = True
    reached = set()
    while queue:
        i, j = queue.popleft()
        if (i, j) in targets:
            reached.add((i, j))
            if reached == set(targets):
                return True
        for di, dj in ((1,0),(-1,0),(0,1),(0,-1)):
            ni, nj = i+di, j+dj
            if 0 <= ni < H and 0 <= nj < W and not visited[ni, nj] and grid[ni, nj] == 0:
                visited[ni, nj] = True
                queue.append((ni, nj))
    return False


In [13]:
class CoverageEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

    def __init__(self, curriculum_max=3, max_steps=200, seed=None):
        super().__init__()
        self.H, self.W = 8, 8
        self.curriculum_level = 0
        self.curriculum_max = curriculum_max
        self.max_steps = max_steps

        # seeding for reproducibility
        self.seed(seed)

        # Action: down, up, right, left
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(0.0, 1.0, shape=(5, self.H, self.W), dtype=np.float32)

        # Define a fixed shape library
        self.shape_library = [
            np.array([[1]]),                # single cell
            np.ones((1,3), dtype=int),     # horizontal bar
            np.ones((2,2), dtype=int),     # 2x2 block
            np.array([[1,1,1],             # U-shape
                      [1,0,1],
                      [1,1,1]]),
            np.array([[1,1,0],             # L-shape
                      [1,0,0]])
        ]

    def seed(self, seed=None):
        """
        Seed the environment's RNGs for reproducible layouts.
        """
        np.random.seed(seed)
        random.seed(seed)
        return [seed]

    def reset(self, *, seed=None, options=None):
        """
        Reset the environment; returns obs (shape C×H×W), info
        Channels:
         0 = free space
         1 = obstacles
         2 = agent location
         3 = target area
         4 = visited mask (all zeros at reset)
        """
        if seed is not None:
            self.seed(42)
        else:
            self.seed(42)

        self.curriculum_level = min(self.curriculum_max, self.curriculum_level + 1)

        while True:
            grid = np.zeros((self.H, self.W), dtype=int)
            allowed = self.shape_library[: self.curriculum_level + 1]
            num_shapes = np.random.randint(1, self.curriculum_level * 2 + 1)
            placed = np.zeros_like(grid)
            for _ in range(num_shapes):
                shape = random.choice(allowed)
                sh, sw = shape.shape
                i = np.random.randint(0, self.H - sh + 1)
                j = np.random.randint(0, self.W - sw + 1)
                if not np.any(placed[i:i+sh, j:j+sw] & shape):
                    placed[i:i+sh, j:j+sw] |= shape
            grid = placed

            ti = np.random.randint(0, self.H - 3 + 1)
            tj = np.random.randint(0, self.W - 3 + 1)
            full_block = [(ti+di, tj+dj) for di in range(3) for dj in range(3)]
            targets = [(i,j) for (i,j) in full_block if grid[i,j] == 0]
            if not targets:
                continue

            free_cells = list(zip(*np.where(grid == 0)))
            start = random.choice(free_cells)
            if bfs_reachable(grid, start, targets):
                break

        self.grid = grid
        self.targets = set(targets)
        self.agent_pos = start
        self.visited = { start }
        self.steps = 0

        return self._get_obs(), {}

    def _get_obs(self):
        C = 5
        state = np.zeros((C, self.H, self.W), dtype=np.float32)

        # channel 0: free space (grid==0)
        state[0, :, :] = (self.grid == 0).astype(np.float32)
        # channel 1: obstacles (grid!=0)
        state[1, :, :] = (self.grid != 0).astype(np.float32)
        # channel 2: agent location
        i, j = self.agent_pos
        state[2, i, j] = 1.0
        # channel 3: target area
        for (ti, tj) in self.targets:
            state[3, ti, tj] = 1.0
        for vi, vj in self.visited:
            state[4, vi, vj] = 1.0
        
        return state

    def step(self, action):
        # Define movement vectors
        # 0 = down, 1 = up, 2 = right, 3 = left
        moves = {0: (1, 0), 1: (-1, 0), 2: (0, 1), 3: (0, -1)}
        i, j = self.agent_pos
        di, dj = moves[action]
        ni, nj = i + di, j + dj

        # Default baseline reward
        reward = 0

        # Check validity and apply penalties
        if not (0 <= ni < self.H and 0 <= nj < self.W and self.grid[ni, nj] == 0):
            # Invalid action: stay in place
            self.agent_pos = (i, j) 
        else:
            # Valid move: update position
            self.agent_pos = (ni, nj)

        # Check if on a target
        if self.agent_pos not in self.visited:
            if self.agent_pos in self.targets:
                reward = 2.0   # new target
            self.visited.add(self.agent_pos)
        else:
            reward = -1

        # Step count
        self.steps += 1

        # Terminal bonus
        terminated = True
        for (i,j) in self.targets:
            if (i,j) not in self.visited:
                terminated = False
                break
            
        truncated = (self.steps >= self.max_steps)
        if terminated:
            reward += 30.0

        return self._get_obs(), reward, terminated, truncated, {}

    def render(self, mode="human"):
        disp = np.full((self.H, self.W), '.', dtype=str)
        for (i,j) in self.targets:
            disp[i,j] = 'T'
        for (i,j) in zip(*np.where(self.grid == 1)):
            disp[i,j] = '#'
        ai, aj = self.agent_pos
        disp[ai, aj] = 'A'
        print("\n".join("".join(row) for row in disp))
        print("\n")

    def close(self):
        pass


In [14]:
def astar(grid, start, goal):
    H, W = grid.shape
    # f, g, (i,j), parent
    open_set = [(abs(start[0]-goal[0]) + abs(start[1]-goal[1]), 0, start, None)]
    came_from = {}
    g_score = {start: 0}
    while open_set:
        f, g, current, parent = heappop(open_set)
        if current == goal:
            # reconstrói caminho
            path = [current]
            while parent:
                path.append(parent)
                parent = came_from[parent]
            return list(reversed(path))
        if current in came_from:
            continue
        came_from[current] = parent
        ci, cj = current
        for di, dj in [(1,0),(-1,0),(0,1),(0,-1)]:
            ni, nj = ci+di, cj+dj
            if 0 <= ni < H and 0 <= nj < W and grid[ni,nj]==0:
                neigh = (ni,nj)
                tentative_g = g + 1
                if tentative_g < g_score.get(neigh, 1e9):
                    g_score[neigh] = tentative_g
                    h = abs(ni-goal[0]) + abs(nj-goal[1])
                    heappush(open_set, (tentative_g + h, tentative_g, neigh, current))
    return None

def coverage_with_astar(env):
    obs, _ = env.reset()
    total_reward = 0
    visited = set(env.visited)
    while True:
        # escolhe o alvo mais próximo não visitado
        remaining = list(env.targets - visited)
        if not remaining:
            break
        remaining.sort(key=lambda cell: abs(env.agent_pos[0]-cell[0]) + abs(env.agent_pos[1]-cell[1]))
        goal = remaining[0]

        path = astar(env.grid, env.agent_pos, goal)
        if path is None:
            print(f"Alvo {goal} inacessível")
            break

        # executa passos até o alvo
        for next_cell in path[1:]:
            ci, cj = env.agent_pos
            ni, nj = next_cell
            if ni > ci:   action = 0
            elif ni < ci: action = 1
            elif nj > cj: action = 2
            else:         action = 3

            obs, reward, done, truncated, _ = env.step(action)
            total_reward += reward
            visited = set(env.visited)
            env.render()
            if done or truncated:
                break
        if done or truncated:
            break

    print("Recompensa total (A*):", total_reward)

In [15]:
env = CoverageEnv(seed=42)
coverage_with_astar(env)


..A.....
........
....TTT.
....#TT.
....TTT.
........
........
........


...A....
........
....TTT.
....#TT.
....TTT.
........
........
........


....A...
........
....TTT.
....#TT.
....TTT.
........
........
........


........
....A...
....TTT.
....#TT.
....TTT.
........
........
........


........
........
....ATT.
....#TT.
....TTT.
........
........
........


........
........
....TAT.
....#TT.
....TTT.
........
........
........


........
........
....TTA.
....#TT.
....TTT.
........
........
........


........
........
....TTT.
....#TA.
....TTT.
........
........
........


........
........
....TTT.
....#TT.
....TTA.
........
........
........


........
........
....TTT.
....#TT.
....TAT.
........
........
........


........
........
....TTT.
....#TT.
....ATT.
........
........
........


........
........
....TTT.
....#TT.
....TAT.
........
........
........


........
........
....TTT.
....#AT.
....TTT.
........
........
........


Recompensa total (A*): 45.0
