In [4]:
pip install pygame

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install imageio

Note: you may need to restart the kernel to use updated packages.


In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import math

class CutSquareEnv(gym.Env):
    def __init__(self, grid_size=50, square_top=10, square_bottom=40, square_left=10, square_right=40):
        super().__init__()
        self.grid_size = grid_size
        self.observation_space = spaces.Box(low=0, high=1, shape=(grid_size, grid_size), dtype=np.int32)
        self.action_space = spaces.MultiDiscrete([grid_size, grid_size])
        
        # Square and inscribed circle parameters
        self.square_top = square_top
        self.square_bottom = square_bottom
        self.square_left = square_left
        self.square_right = square_right
        self.center = ((square_left + square_right) / 2, (square_top + square_bottom) / 2)
        self.circle_radius = (square_right - square_left) / 2  # Half the side length of the square
        
        # Pygame rendering setup
        pygame.init()
        self.cell_size = 10
        self.screen = pygame.display.set_mode((grid_size * self.cell_size, grid_size * self.cell_size))
        pygame.display.set_caption("Cut Square Environment")
        self.clock = pygame.time.Clock()
        
        self.reset()

    def reset(self, seed=None, options=None):
        self.grid = np.zeros((self.grid_size, self.grid_size), dtype=np.int32)
        self.grid[self.square_top:self.square_bottom, self.square_left:self.square_right] = 1  # Fill the square
        self.steps = 0
        return self.grid.copy(), {}

    def step(self, action):
        x, y = action
        reward = 0
        info = {}

        if self.grid[y, x] == 0:
            reward = -5  # Heavy penalty for selecting an already removed cell
        else:
            if self._is_edge(x, y):
                self.grid[y, x] = 0  # Remove the cell
                reward += 2  # Reward for correctly removing an edge pixel

                # Distance-based shaping incentive
                dist = math.hypot(x - self.center[0], y - self.center[1])
                if dist >= self.circle_radius:
                    reward += 3  # Reward for shaping towards a circle
                else:
                    reward -= 3  # Penalty for removing pixels inside the ideal circle too soon
            else:
                reward -= 4  # Heavy penalty if removing an internal pixel

        self.steps += 1
        done = self._is_circle_formed()
        return self.grid.copy(), reward, done, False, info

    def _is_edge(self, x, y):
        """A pixel is an edge if it's part of the square and at least one of its neighbors is removed or outside the square."""
        if self.grid[y, x] == 0:
            return False  # Already removed, not an edge

        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nx, ny = x + dx, y + dy
            if not (self.square_left <= nx < self.square_right and self.square_top <= ny < self.square_bottom):
                return True  # Near the square boundary
            if self.grid[ny, nx] == 0:
                return True  # Neighbor is removed
        
        return False  # Otherwise, it's an internal pixel

    def _is_circle_formed(self):
        """Check if only the inscribed circle remains and all excess pixels are removed."""
        indices = np.indices(self.grid.shape)
        xs, ys = indices[1], indices[0]

        # Mask only the remaining filled pixels inside the original square
        mask = (self.grid == 1) & (self.square_left <= xs) & (xs < self.square_right) & (self.square_top <= ys) & (ys < self.square_bottom)

        # Compute Euclidean distance from the center
        dists = np.sqrt((xs - self.center[0])**2 + (ys - self.center[1])**2)

        # Check if the only remaining pixels are inside the inscribed circle
        remaining_pixels = mask & (dists > self.circle_radius)
        return not np.any(remaining_pixels)  # Only continue if all remaining pixels are inside the circle

    def render(self):
        """Render the grid with Pygame."""
        self.screen.fill((0, 0, 0))
        for y in range(self.grid_size):
            for x in range(self.grid_size):
                if self.grid[y, x] == 1:
                    # Check if it's part of the inscribed circle
                    dist = math.hypot(x - self.center[0], y - self.center[1])
                    if dist <= self.circle_radius:
                        color = (0, 255, 0)  # Green for inscribed circle
                    else:
                        color = (255, 255, 255)  # White for remaining square
                    rect = pygame.Rect(x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size)
                    pygame.draw.rect(self.screen, color, rect)
        pygame.display.flip()
        self.clock.tick(100)

    def close(self):
        pygame.quit()

# Create the environment
env = CutSquareEnv()

# Training loop
for episode in range(1000):
    state, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = env.action_space.sample()
        next_state, reward, done, _, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f"Episode {episode+1} complete. Total reward: {total_reward}")

print("Training complete! Now visualizing a final episode...")

# Final visualization
state, _ = env.reset()
done = False
while not env._is_circle_formed():  # Ensure rendering continues until a full circle is formed
    action = env.action_space.sample()
    state, reward, done, _, _ = env.step(action)
    env.render()
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True
            break

env.close()


Episode 1 complete. Total reward: -69885
Episode 2 complete. Total reward: -93192
Episode 3 complete. Total reward: -76713
Episode 4 complete. Total reward: -82687
Episode 5 complete. Total reward: -68283
Episode 6 complete. Total reward: -75845
Episode 7 complete. Total reward: -78862
Episode 8 complete. Total reward: -81682
Episode 9 complete. Total reward: -76281
Episode 10 complete. Total reward: -88716
Episode 11 complete. Total reward: -90831
Episode 12 complete. Total reward: -82883
Episode 13 complete. Total reward: -73933
Episode 14 complete. Total reward: -51372
Episode 15 complete. Total reward: -55438
Episode 16 complete. Total reward: -73442
Episode 17 complete. Total reward: -75731
Episode 18 complete. Total reward: -64921
Episode 19 complete. Total reward: -70799
Episode 20 complete. Total reward: -69007
Episode 21 complete. Total reward: -60193
Episode 22 complete. Total reward: -63573
Episode 23 complete. Total reward: -89181
Episode 24 complete. Total reward: -69273
E

In [3]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import math
import imageio

class CutSquareEnv(gym.Env):
    def __init__(self, grid_size=50, square_top=10, square_bottom=40, square_left=10, square_right=40):
        super().__init__()
        self.grid_size = grid_size
        self.observation_space = spaces.Box(low=0, high=1, shape=(grid_size, grid_size), dtype=np.int32)
        self.action_space = spaces.MultiDiscrete([grid_size, grid_size])
        
        # Square and inscribed circle parameters
        self.square_top = square_top
        self.square_bottom = square_bottom
        self.square_left = square_left
        self.square_right = square_right
        self.center = ((square_left + square_right) / 2, (square_top + square_bottom) / 2)
        self.circle_radius = (square_right - square_left) / 2  # Half the side length of the square
        
        # Pygame rendering setup
        pygame.init()
        self.cell_size = 10
        self.screen = pygame.display.set_mode((grid_size * self.cell_size, grid_size * self.cell_size))
        pygame.display.set_caption("Cut Square Environment")
        self.clock = pygame.time.Clock()
        
        self.frames = []  # List to store frames for GIF export
        self.reset()

    def reset(self, seed=None, options=None):
        self.grid = np.zeros((self.grid_size, self.grid_size), dtype=np.int32)
        self.grid[self.square_top:self.square_bottom, self.square_left:self.square_right] = 1  # Fill the square
        self.steps = 0
        self.frames = []  # Reset frames list
        return self.grid.copy(), {}

    def step(self, action):
        x, y = action
        reward = 0
        info = {}

        if self.grid[y, x] == 0:
            reward = -5  # Heavy penalty for selecting an already removed cell
        else:
            if self._is_edge(x, y):
                self.grid[y, x] = 0  # Remove the cell
                reward += 10  # Reward for correctly removing an edge pixel
                
                # Distance-based shaping incentive
                dist = math.hypot(x - self.center[0], y - self.center[1])
                if dist >= self.circle_radius:
                    reward += 100  # Reward for shaping towards a circle
                else:
                    reward -= 60  # Penalty for removing pixels inside the ideal circle too soon
            else:
                reward -= 10  # Heavy penalty if removing an internal pixel

        self.steps += 1
        done = self._is_circle_formed()
        return self.grid.copy(), reward, done, False, info

    def _is_edge(self, x, y):
        if self.grid[y, x] == 0:
            return False  # Already removed, not an edge

        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nx, ny = x + dx, y + dy
            if not (self.square_left <= nx < self.square_right and self.square_top <= ny < self.square_bottom):
                return True  # Near the square boundary
            if self.grid[ny, nx] == 0:
                return True  # Neighbor is removed
        
        return False

    def _is_circle_formed(self):
        indices = np.indices(self.grid.shape)
        xs, ys = indices[1], indices[0]
        mask = (self.grid == 1) & (self.square_left <= xs) & (xs < self.square_right) & (self.square_top <= ys) & (ys < self.square_bottom)
        dists = np.sqrt((xs - self.center[0])**2 + (ys - self.center[1])**2)
        remaining_pixels = mask & (dists > self.circle_radius)
        return not np.any(remaining_pixels)

    def render(self):
        self.screen.fill((0, 0, 0))
        for y in range(self.grid_size):
            for x in range(self.grid_size):
                if self.grid[y, x] == 1:
                    dist = math.hypot(x - self.center[0], y - self.center[1])
                    color = (0, 255, 0) if dist <= self.circle_radius else (255, 255, 255)
                    rect = pygame.Rect(x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size)
                    pygame.draw.rect(self.screen, color, rect)
        pygame.display.flip()
        self.clock.tick(100)
        
        # Capture frame
        frame = pygame.surfarray.array3d(self.screen)
        frame = np.transpose(frame, (1, 0, 2))  # Convert to correct format for imageio
        self.frames.append(frame)

    def save_gif(self, filename="cut_square_simulation.gif"):
        imageio.mimsave(filename, self.frames, duration=0.05)
        print(f"GIF saved as {filename}")
    
    def close(self):
        pygame.quit()

# Create the environment
env = CutSquareEnv()

# Training loop
for episode in range(1000):
    state, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = env.action_space.sample()
        next_state, reward, done, _, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f"Episode {episode+1} complete. Total reward: {total_reward}")

print("Training complete! Now visualizing a final episode...")

# Final visualization
state, _ = env.reset()
done = False
while not env._is_circle_formed():
    action = env.action_space.sample()
    state, reward, done, _, _ = env.step(action)
    env.render()
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True
            break

env.save_gif(r"D:\IaaC\TERM 2\AI IN ROBOTICS\Code\Reinforced Learning_Day02\output\cut_square_simulation.gif")
env.close()


Episode 1 complete. Total reward: -96945
Episode 2 complete. Total reward: -102315
Episode 3 complete. Total reward: -71585
Episode 4 complete. Total reward: -109350
Episode 5 complete. Total reward: -140595
Episode 6 complete. Total reward: -91795
Episode 7 complete. Total reward: -92755
Episode 8 complete. Total reward: -105170
Episode 9 complete. Total reward: -97780
Episode 10 complete. Total reward: -85795
Episode 11 complete. Total reward: -78640
Episode 12 complete. Total reward: -125810
Episode 13 complete. Total reward: -88035
Episode 14 complete. Total reward: -95125
Episode 15 complete. Total reward: -93235
Episode 16 complete. Total reward: -101875
Episode 17 complete. Total reward: -74775
Episode 18 complete. Total reward: -92025
Episode 19 complete. Total reward: -104580
Episode 20 complete. Total reward: -72775
Episode 21 complete. Total reward: -122845
Episode 22 complete. Total reward: -171890
Episode 23 complete. Total reward: -95575
Episode 24 complete. Total reward: