In [2]:
pip install pygame

Collecting pygame
  Downloading pygame-2.6.1-cp310-cp310-win_amd64.whl.metadata (13 kB)
Downloading pygame-2.6.1-cp310-cp310-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   ------ --------------------------------- 1.8/10.6 MB 12.6 MB/s eta 0:00:01
   -------------------- ------------------- 5.5/10.6 MB 16.0 MB/s eta 0:00:01
   ------------------------------------- -- 10.0/10.6 MB 17.2 MB/s eta 0:00:01
   ---------------------------------------- 10.6/10.6 MB 16.6 MB/s eta 0:00:00
Installing collected packages: pygame
Successfully installed pygame-2.6.1
Note: you may need to restart the kernel to use updated packages.


In [10]:
# filepath: d:\IaaC\TERM 2\AI IN ROBOTICS\Code\Reinforced Learning_Day02\Wood Carving.ipynb
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import math

class CutSquareEnv(gym.Env):
    def __init__(self, grid_size=50, square_top=10, square_bottom=40, square_left=10, square_right=40):
        super().__init__()
        self.grid_size = grid_size
        # Grid: 1 = cell present, 0 = removed
        self.observation_space = spaces.Box(low=0, high=1, shape=(grid_size, grid_size), dtype=np.int32)
        # Action: choose a cell coordinate to remove
        self.action_space = spaces.MultiDiscrete([grid_size, grid_size])
        
        # Parameters for the square and ideal circle
        self.square_top = square_top
        self.square_bottom = square_bottom
        self.square_left = square_left
        self.square_right = square_right
        self.center = ((square_left + square_right) / 2, (square_top + square_bottom) / 2)
        # Define circle radius as half the side length of the square
        self.circle_radius = (square_right - square_left) / 2
        
        # pygame display parameters
        pygame.init()
        self.cell_size = 10  # each grid cell drawn as a 10x10 block
        self.screen = pygame.display.set_mode((grid_size * self.cell_size, grid_size * self.cell_size))
        pygame.display.set_caption("Cut Square Environment")
        self.clock = pygame.time.Clock()
        
        self.reset()

    def reset(self, seed=None, options=None):
        # Create an empty grid and fill the square region with 1's.
        self.grid = np.zeros((self.grid_size, self.grid_size), dtype=np.int32)
        self.grid[self.square_top:self.square_bottom, self.square_left:self.square_right] = 1
        self.steps = 0
        return self.grid.copy(), {}
    

    def step(self, action):
        x, y = action
        reward = 0
        info = {}

        if self.grid[y, x] == 0:
            reward = -5  # Larger penalty for removing an already-removed cell
        else:
            # Only allow removal if it is on the edge
            if self._is_edge(x, y):
                self.grid[y, x] = 0  # Remove the cell
                reward += 2  # Reward for correctly removing an edge pixel

                # Distance check: Removing outermost pixels is better
                dist = math.hypot(x - self.center[0], y - self.center[1])
                if dist >= self.circle_radius:
                    reward += 3  # Extra reward for shaping towards the ideal circle
                else:
                    reward -= 3  # Penalty if it removes inside the circle too early
            else:
                reward -= 4  # Heavy penalty if removing an internal pixel

        self.steps += 1
        done = (np.sum(self.grid) == 0) or (self.steps >= 500)
        return self.grid.copy(), reward, done, False, info
    

    def _is_edge(self, x, y):
        if self.grid[y, x] == 0:
            return False  # Already removed, not an edge

        # Edge condition: At least one neighbor is outside the square OR removed
        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nx, ny = x + dx, y + dy
            if not (self.square_left <= nx < self.square_right and self.square_top <= ny < self.square_bottom):
                return True  # Near the square boundary
            if self.grid[ny, nx] == 0:
                return True  # Neighbor is removed
        
        return False  # Otherwise, it's an internal pixel

    def render(self):
        # Draw the grid; filled cells are white, removed ones are black.
        self.screen.fill((0, 0, 0))
        for y in range(self.grid_size):
            for x in range(self.grid_size):
                if self.grid[y, x] == 1:
                    rect = pygame.Rect(x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size)
                    pygame.draw.rect(self.screen, (255, 255, 255), rect)
        pygame.display.flip()
        self.clock.tick(100)

    def _no_more_removals(self):
        indices = np.indices(self.grid.shape)
        xs, ys = indices[1], indices[0]

        # Consider only the original square region
        mask = (self.grid == 1) & (self.square_left <= xs) & (xs < self.square_right) & (self.square_top <= ys) & (ys < self.square_bottom)

        # Compute Euclidean distance from center
        dists = np.sqrt((xs - self.center[0])**2 + (ys - self.center[1])**2)

        # Stop only when all remaining square pixels are inside the inscribed circle
        return not np.any(mask & (dists > self.circle_radius))        

    def close(self):
        pygame.quit()

# Create the environment
env = CutSquareEnv()

# Training loop (without visualization)
for episode in range(500):
    state, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        # For training, pick a random cell to remove
        action = env.action_space.sample()
        next_state, reward, done, _, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f"Episode {episode+1} complete. Total reward: {total_reward}")

print("Training complete! Now visualizing a final episode...")

# Final episode visualization after training
state, _ = env.reset()
done = False
while not done:
    action = env.action_space.sample()
    state, reward, done, _, _ = env.step(action)
    env.render()
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True

env.close()

Episode 1 complete. Total reward: -2166
Episode 2 complete. Total reward: -2151
Episode 3 complete. Total reward: -2146
Episode 4 complete. Total reward: -2112
Episode 5 complete. Total reward: -2191
Episode 6 complete. Total reward: -2212
Episode 7 complete. Total reward: -2116
Episode 8 complete. Total reward: -2124
Episode 9 complete. Total reward: -2106
Episode 10 complete. Total reward: -2176
Episode 11 complete. Total reward: -2180
Episode 12 complete. Total reward: -2182
Episode 13 complete. Total reward: -2145
Episode 14 complete. Total reward: -2142
Episode 15 complete. Total reward: -2048
Episode 16 complete. Total reward: -2175
Episode 17 complete. Total reward: -2118
Episode 18 complete. Total reward: -2153
Episode 19 complete. Total reward: -2218
Episode 20 complete. Total reward: -2206
Episode 21 complete. Total reward: -2210
Episode 22 complete. Total reward: -2137
Episode 23 complete. Total reward: -2075
Episode 24 complete. Total reward: -2122
Episode 25 complete. Tota

In [17]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import math

class CutSquareEnv(gym.Env):
    def __init__(self, grid_size=50, square_top=10, square_bottom=40, square_left=10, square_right=40):
        super().__init__()
        self.grid_size = grid_size
        self.observation_space = spaces.Box(low=0, high=1, shape=(grid_size, grid_size), dtype=np.int32)
        self.action_space = spaces.MultiDiscrete([grid_size, grid_size])
        
        # Square and inscribed circle parameters
        self.square_top = square_top
        self.square_bottom = square_bottom
        self.square_left = square_left
        self.square_right = square_right
        self.center = ((square_left + square_right) / 2, (square_top + square_bottom) / 2)
        self.circle_radius = (square_right - square_left) / 4  # Quarter the side length of the square
        
        # Pygame rendering setup
        pygame.init()
        self.cell_size = 10
        self.screen = pygame.display.set_mode((grid_size * self.cell_size, grid_size * self.cell_size))
        pygame.display.set_caption("Cut Square Environment")
        self.clock = pygame.time.Clock()
        
        self.reset()

    def reset(self, seed=None, options=None):
        self.grid = np.zeros((self.grid_size, self.grid_size), dtype=np.int32)
        self.grid[self.square_top:self.square_bottom, self.square_left:self.square_right] = 1  # Fill the square
        self.steps = 0
        return self.grid.copy(), {}

    def step(self, action):
        x, y = action
        reward = 0
        info = {}

        if self.grid[y, x] == 0:
            reward = -5  # Heavy penalty for selecting an already removed cell
        else:
            if self._is_edge(x, y):
                self.grid[y, x] = 0  # Remove the cell
                reward += 2  # Reward for correctly removing an edge pixel

                # Distance-based shaping incentive
                dist = math.hypot(x - self.center[0], y - self.center[1])
                if dist >= self.circle_radius:
                    reward += 3  # Reward for shaping towards a circle
                else:
                    reward -= 3  # Penalty for removing pixels inside the ideal circle too soon
            else:
                reward -= 4  # Heavy penalty if removing an internal pixel

        self.steps += 1
        done = self._is_circle_formed() or (self.steps >= 1000)
        return self.grid.copy(), reward, done, False, info

    def _is_edge(self, x, y):
        """A pixel is an edge if it's part of the square and at least one of its neighbors is removed or outside the square."""
        if self.grid[y, x] == 0:
            return False  # Already removed, not an edge

        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nx, ny = x + dx, y + dy
            if not (self.square_left <= nx < self.square_right and self.square_top <= ny < self.square_bottom):
                return True  # Near the square boundary
            if self.grid[ny, nx] == 0:
                return True  # Neighbor is removed
        
        return False  # Otherwise, it's an internal pixel

    def _is_circle_formed(self):
        """Check if only the inscribed circle remains and all excess pixels are removed."""
        indices = np.indices(self.grid.shape)
        xs, ys = indices[1], indices[0]

        # Mask only the remaining filled pixels inside the original square
        mask = (self.grid == 1) & (self.square_left <= xs) & (xs < self.square_right) & (self.square_top <= ys) & (ys < self.square_bottom)

        # Compute Euclidean distance from the center
        dists = np.sqrt((xs - self.center[0])**2 + (ys - self.center[1])**2)

        # The process is done when all remaining pixels are within the inscribed circle
        return np.all(mask & (dists <= self.circle_radius))

    def render(self):
        """Render the grid with Pygame."""
        self.screen.fill((0, 0, 0))
        for y in range(self.grid_size):
            for x in range(self.grid_size):
                if self.grid[y, x] == 1:
                    # Check if it's part of the inscribed circle
                    dist = math.hypot(x - self.center[0], y - self.center[1])
                    if dist <= self.circle_radius:
                        color = (0, 255, 0)  # Green for inscribed circle
                    else:
                        color = (255, 255, 255)  # White for remaining square
                    rect = pygame.Rect(x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size)
                    pygame.draw.rect(self.screen, color, rect)
        pygame.display.flip()
        self.clock.tick(1000)

    def close(self):
        pygame.quit()

# Create the environment
env = CutSquareEnv()

# Training loop (without visualization)
for episode in range(500):
    state, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = env.action_space.sample()
        next_state, reward, done, _, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f"Episode {episode+1} complete. Total reward: {total_reward}")

print("Training complete! Now visualizing a final episode...")

# Final visualization
state, _ = env.reset()
done = False
while not done:
    action = env.action_space.sample()
    state, reward, done, _, _ = env.step(action)
    env.render()
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True

env.close()


Episode 1 complete. Total reward: -4241
Episode 2 complete. Total reward: -4285
Episode 3 complete. Total reward: -4199
Episode 4 complete. Total reward: -4293
Episode 5 complete. Total reward: -4184
Episode 6 complete. Total reward: -4334
Episode 7 complete. Total reward: -4337
Episode 8 complete. Total reward: -4173
Episode 9 complete. Total reward: -4216
Episode 10 complete. Total reward: -4234
Episode 11 complete. Total reward: -4224
Episode 12 complete. Total reward: -4245
Episode 13 complete. Total reward: -4267
Episode 14 complete. Total reward: -4087
Episode 15 complete. Total reward: -4288
Episode 16 complete. Total reward: -4145
Episode 17 complete. Total reward: -4290
Episode 18 complete. Total reward: -4089
Episode 19 complete. Total reward: -4126
Episode 20 complete. Total reward: -4091
Episode 21 complete. Total reward: -4217
Episode 22 complete. Total reward: -4169
Episode 23 complete. Total reward: -4171
Episode 24 complete. Total reward: -4180
Episode 25 complete. Tota