# Notebook to experiment with the environment created

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

In [3]:
class BrainLesionEnv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 4}

    def __init__(self, image, mask, grid_size=4):
        super().__init__()

        self.image = image.astype(np.float32)
        self.mask = mask.astype(np.uint8)
        self.grid_size = grid_size
        self.block_size = image.shape[0] // grid_size  # 240/4 = 60

        # Define action and observation spaces
        # Actions: 0 = stay, 1 = move down, 2 = move right
        self.action_space = spaces.Discrete(3)

        # Observations: grayscale patch (normalized 0-1)
        self.observation_space = spaces.Box(
            low=0, high=1,
            shape=(self.block_size, self.block_size),
            dtype=np.float32
        )

        self.agent_pos = [0, 0]
        self.current_step = 0
        self.max_steps = 20  # like in the paper

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.agent_pos = [0, 0]  # top-left corner
        self.current_step = 0
        obs = self._get_obs()
        info = {}
        return obs, info

    def step(self, action):
        self.current_step += 1

        # Apply action (respect grid boundaries)
        if action == 1 and self.agent_pos[0] < self.grid_size - 1:
            self.agent_pos[0] += 1  # move down
        elif action == 2 and self.agent_pos[1] < self.grid_size - 1:
            self.agent_pos[1] += 1  # move right

        reward = self._compute_reward(action)
        obs = self._get_obs()

        # Episode ends after fixed number of steps
        terminated = self.current_step >= self.max_steps
        truncated = False  # we don’t need truncation here
        info = {}

        return obs, reward, terminated, truncated, info

    def _get_obs(self):
        """Return the current 60×60 patch normalized to [0,1]."""
        r0 = self.agent_pos[0] * self.block_size
        c0 = self.agent_pos[1] * self.block_size
        patch = self.image[r0:r0+self.block_size, c0:c0+self.block_size]
        return patch / 255.0

    def _compute_reward(self, action):
        """Implements Stember & Shalu’s reward scheme."""
        r0 = self.agent_pos[0] * self.block_size
        c0 = self.agent_pos[1] * self.block_size
        patch_mask = self.mask[r0:r0+self.block_size, c0:c0+self.block_size]
        inside = np.any(patch_mask > 0)

        if inside and action == 0:           # stayed inside lesion
            return +1.0
        elif not inside and action in [1, 2]:
            # check if movement entered the lesion
            if inside:
                return +1.0
            else:
                return -0.5
        elif not inside and action == 0:
            return -2.0
        return 0.0

    def render(self):
        """Optional: visualize agent position (for debugging)."""
        grid_img = np.copy(self.image)
        r0 = self.agent_pos[0] * self.block_size
        c0 = self.agent_pos[1] * self.block_size
        grid_img[r0:r0+self.block_size, c0:c0+self.block_size] = 255  # mark agent
        print(f"Agent position: {self.agent_pos}")


In [5]:
img = np.random.randint(0, 256, (240, 240), dtype=np.uint8)
mask = np.zeros((240, 240), dtype=np.uint8)
mask[120:180, 120:180] = 1  # fake “tumor” region

env = BrainLesionEnv(img, mask)
obs, info = env.reset()

for _ in range(10):
    action = env.action_space.sample()
    obs, reward, done, trunc, info = env.step(action)
    print(f"Action {action} → Reward {reward}")
    if done:
        break


Action 0 → Reward -2.0
Action 0 → Reward -2.0
Action 2 → Reward -0.5
Action 0 → Reward -2.0
Action 0 → Reward -2.0
Action 0 → Reward -2.0
Action 0 → Reward -2.0
Action 2 → Reward -0.5
Action 2 → Reward -0.5
Action 2 → Reward -0.5
