In [1]:
import gym
from gym import spaces
import numpy as np
import random
import math

class CursorControlEnv(gym.Env):
    """
    A minimal environment for 2D cursor control with multiple goals and obstacles.
    The agent controls the cursor by outputting a 2D action (dx, dy),
    scaled by max_speed. The environment checks collisions, distance to goal,
    and ends when the final goal is reached (or on collision).
    """
    def __init__(self, width=1200, height=800, num_goals=3, enable_obstacles=True):
        super(CursorControlEnv, self).__init__()

        # Environment parameters
        self.width = width
        self.height = height
        self.num_goals = num_goals
        self.enable_obstacles = enable_obstacles

        # Movement settings
        self.max_speed = 3.0
        self.dot_radius = 30.0
        self.target_radius = 10.0
        self.goal_detection_radius = self.dot_radius + self.target_radius
        self.obstacle_radius = 20.0
        self.collision_buffer = 5.0

        # Action space: 2D continuous, each in [-1, 1]
        self.action_space = spaces.Box(
            low=np.array([-1.0, -1.0], dtype=np.float32),
            high=np.array([ 1.0,  1.0], dtype=np.float32),
            shape=(2,),
            dtype=np.float32
        )

        # Observation space (example):
        #   [dot_x, dot_y, current_target_x, current_target_y]
        # If you want to encode obstacle info or other goals, you can enlarge this.
        high = np.array([self.width, self.height, self.width, self.height], dtype=np.float32)
        self.observation_space = spaces.Box(
            low=np.zeros_like(high, dtype=np.float32),
            high=high,
            shape=(4,),
            dtype=np.float32
        )

        # Internal state
        self.dot_pos = None
        self.targets = []
        self.current_target_idx = None
        self.obstacles = []
        self.done = False

        self.reset()

    def reset(self):
        """
        Resets the environment state:
          - Dot position = center of the window
          - Generate random goals
          - (Optionally) generate random obstacles
        Returns the initial observation.
        """
        self.dot_pos = np.array([self.width / 2, self.height / 2], dtype=np.float32)

        # Generate goals
        self.targets = []
        for _ in range(self.num_goals):
            gx = random.randint(0, self.width)
            gy = random.randint(0, self.height)
            self.targets.append(np.array([gx, gy], dtype=np.float32))

        self.current_target_idx = 0

        # Generate obstacles
        self.obstacles = []
        if self.enable_obstacles:
            # Example: spawn 3 random obstacles
            for _ in range(3):
                ox = random.randint(int(self.obstacle_radius), int(self.width - self.obstacle_radius))
                oy = random.randint(int(self.obstacle_radius), int(self.height - self.obstacle_radius))
                self.obstacles.append(np.array([ox, oy], dtype=np.float32))

        self.done = False
        return self._get_obs()

    def step(self, action):
        """
        Steps the environment forward by 1 timestep using the agent's action (dx, dy in [-1,1]).
        1) Convert [-1,1] range to actual movement in [-max_speed, max_speed].
        2) Update dot position, check collision, check if goal reached.
        3) Return observation, reward, done, info.
        """
        if self.done:
            # If somehow step() called after done, just return something
            return self._get_obs(), 0.0, True, {}

        # Scale action to [-max_speed, max_speed]
        dx = float(action[0]) * self.max_speed
        dy = float(action[1]) * self.max_speed

        # Proposed new position
        new_x = self.dot_pos[0] + dx
        new_y = self.dot_pos[1] + dy

        # Clamp to environment bounds
        new_x = np.clip(new_x, 0, self.width)
        new_y = np.clip(new_y, 0, self.height)

        # Check if the movement would collide with an obstacle
        # For simplicity, we just check final position for overlap
        if self.enable_obstacles:
            if self._check_collision(self.dot_pos, [new_x, new_y]):
                # If collision, give negative reward and end episode
                reward = -10.0
                self.done = True
                return self._get_obs(), reward, self.done, {}

        # Apply the movement
        self.dot_pos[0] = new_x
        self.dot_pos[1] = new_y

        # Check goal progress
        dist_to_goal = self._distance(self.dot_pos, self.current_goal)
        done = False
        reward = 0.0
        # Reward shaping: negative for distance
        # (this helps the agent learn that smaller distance is better)
        reward -= dist_to_goal * 0.001

        # Check if we reached the goal
        if dist_to_goal <= self.goal_detection_radius:
            reward += 10.0  # reward for reaching a goal
            self.current_target_idx += 1
            if self.current_target_idx >= self.num_goals:
                done = True
                self.done = True
            else:
                # Move on to next goal
                pass

        self.done = done
        return self._get_obs(), reward, self.done, {}

    def render(self, mode="human"):
        """
        (Optional) Render method if you want to visualize during training.
        Here, we’ll just print basic info. For a Pygame-based rendering, you'd
        replicate some of your earlier drawing logic.
        """
        print(f"Dot: {self.dot_pos}, Current Goal: {self.current_goal}, Obstacles: {len(self.obstacles)}")

    def _get_obs(self):
        """
        Return the current environment observation:
          [dot_x, dot_y, current_goal_x, current_goal_y]
        """
        return np.array([
            self.dot_pos[0],
            self.dot_pos[1],
            self.current_goal[0],
            self.current_goal[1]
        ], dtype=np.float32)

    @property
    def current_goal(self):
        return self.targets[self.current_target_idx]

    def _distance(self, pos1, pos2):
        return math.hypot(pos1[0] - pos2[0], pos1[1] - pos2[1])

    def _line_circle_intersection(self, start, end, center, radius):
        """
        Checks if the line (start->end) intersects (or comes within radius) of center.
        This logic is adapted from your original code snippet.
        """
        dx = end[0] - start[0]
        dy = end[1] - start[1]
        cx = center[0] - start[0]
        cy = center[1] - start[1]
        l2 = dx*dx + dy*dy
        if l2 == 0:
            # start and end are the same
            return self._distance(start, center) <= radius
        t = max(0, min(1, (cx*dx + cy*dy) / l2))
        proj_x = start[0] + t * dx
        proj_y = start[1] + t * dy
        return (self._distance((proj_x, proj_y), center) <= radius)

    def _check_collision(self, old_pos, new_pos):
        """
        Check if moving from old_pos to new_pos intersects any obstacle circle.
        """
        for obs in self.obstacles:
            # Expand radius by collision_buffer
            if self._line_circle_intersection(old_pos, new_pos, obs, self.obstacle_radius + self.collision_buffer):
                return True
        return False
