In [1]:
import gymnasium as gym
from heapq import heappush, heappop
import numpy as np
import random
from gymnasium import spaces
from collections import deque
from coverage_env import CoverageEnv
import numpy as np


In [2]:
def astar(grid, start, goal):
    H, W = grid.shape
    open_set = [(abs(start[0]-goal[0]) + abs(start[1]-goal[1]), 0, start, None)]
    came_from = {}
    g_score = {start: 0}
    while open_set:
        f, g, current, parent = heappop(open_set)
        if current == goal:
            # reconstrói caminho
            path = [current]
            while parent:
                path.append(parent)
                parent = came_from[parent]
            return list(reversed(path))
        if current in came_from:
            continue
        came_from[current] = parent
        ci, cj = current
        for di, dj in [(1,0),(-1,0),(0,1),(0,-1)]:
            ni, nj = ci+di, cj+dj
            if 0 <= ni < H and 0 <= nj < W and grid[ni,nj]==0:
                neigh = (ni,nj)
                tentative_g = g + 1
                if tentative_g < g_score.get(neigh, 1e9):
                    g_score[neigh] = tentative_g
                    h = abs(ni-goal[0]) + abs(nj-goal[1])
                    heappush(open_set, (tentative_g + h, tentative_g, neigh, current))
    return None


In [3]:
def coverage_with_astar(env):
    obs, _ = env.reset()
    total_reward = 0
    visited = set(env.visited)
    done = False
    truncated = False

    while True:
        remaining = list(env.targets - visited)
        if not remaining:
            break
        remaining.sort(key=lambda cell: abs(env.agent_pos[0]-cell[0]) + abs(env.agent_pos[1]-cell[1]))
        goal = remaining[0]

        path = astar(env.grid, env.agent_pos, goal)
        if path is None:
            print(f"Alvo {goal} inacessível")
            break

        for next_cell in path[1:]:
            ci, cj = env.agent_pos
            ni, nj = next_cell
            if ni > ci:   action = 0
            elif ni < ci: action = 1
            elif nj > cj: action = 2
            else:         action = 3

            obs, reward, done, truncated, _ = env.step(action)
            total_reward += reward
            visited = set(env.visited)
            if done or truncated:
                break
        if done or truncated:
            break

    return total_reward


In [5]:
num_runs = 20
levels = range(5)   
mean_rewards = {}
max_rewards = {}
min_rewards = {}

for level in levels:
    rewards = []
    for run in range(num_runs):
        env = CoverageEnv(curriculum=level)   
        reward = coverage_with_astar(env)
        rewards.append(reward)
    mean_rewards[level] = np.mean(rewards)
    max_rewards[level] = np.max(rewards)
    min_rewards[level] = np.min(rewards)

print("\nMean A* rewards per curriculum level:")
for level, mean_r in mean_rewards.items():
    print(f"  Level {level}: {mean_r:.2f}")

print("\nMax A* rewards per curriculum level:")
for level, max_r in max_rewards.items():
    print(f"  Level {level}: {max_r:.2f}")

print("\nMin A* rewards per curriculum level:")
for level, min_r in min_rewards.items():
    print(f"  Level {level}: {min_r:.2f}")



Mean A* rewards per curriculum level:
  Level 0: 42.25
  Level 1: 45.50
  Level 2: 40.45
  Level 3: 40.65
  Level 4: 30.90

Max A* rewards per curriculum level:
  Level 0: 45.00
  Level 1: 48.00
  Level 2: 44.00
  Level 3: 44.00
  Level 4: 36.00

Min A* rewards per curriculum level:
  Level 0: 40.00
  Level 1: 43.00
  Level 2: 37.00
  Level 3: 38.00
  Level 4: 24.00
