In [3]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class VacEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    
    def __init__(self):
        super(VacEnv, self).__init__()
        
        # Define action space: continuous velocity adjustments in 2D
        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(2,), dtype=np.float32
        )
        
        # Observation space (noisy measurements and detected entities)
        self.observation_space = spaces.Dict({
            "vac_position": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32),
            "vac_velocity": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32),
            "obstacle_positions": spaces.Box(low=-np.inf, high=np.inf, shape=(1, 2), dtype=np.float32),  # 1 obstacle
            "radar_positions": spaces.Box(low=-np.inf, high=np.inf, shape=(3, 2), dtype=np.float32),  # Max 3 Radons
            "goal_position": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32),
            "detected": spaces.Discrete(2)
        })
        
        # Environment parameters
        self.noise_std = 0.1  # Noise for observations
        self.r_vac = 5.0      # Radar radius of the vac
        self.max_speed = 5.0  # Maximum velocity
        
        # Initialize state variables in reset()
        self.pos_vac = None
        self.vel_vac = None
        self.ebs = None       # Obstacle positions
        self.pos_radons = None# Radon positions
        self.r_radons = None  # Radon radii
        self.pos_grad = None  # Goal position
        self.detected_flag = False

    def reset(self, seed=None, options=None):
        # Initialize state
        self.pos_vac = np.array([0.0, 0.0], dtype=np.float32)  # Initial position (pos_vatt)
        self.vel_vac = np.array([0.0, 0.0], dtype=np.float32)
        self.ebs = np.array([[10.0, 10.0]], dtype=np.float32)  # Single obstacle
        self.pos_radons = np.array(                             # Three Radons
            [[15.0, 15.0], [20.0, 20.0], [25.0, 25.0]], dtype=np.float32
        )
        self.r_radons = np.array([3.0, 3.0, 3.0], dtype=np.float32)
        self.pos_grad = np.array([30.0, 30.0], dtype=np.float32)  # Goal (pos_grad)
        self.detected_flag = False
        
        return self._get_obs(), {}

    def _get_obs(self):
        # Add noise to vac's position and velocity
        noisy_pos = self.pos_vac + np.random.normal(0, self.noise_std, size=2)
        noisy_vel = self.vel_vac + np.random.normal(0, self.noise_std, size=2)
        
        # Detect Radons within radar radius (r_vac)
        radar_positions = []
        for radon_pos in self.pos_radons:
            distance = np.linalg.norm(self.pos_vac - radon_pos)
            if distance <= self.r_vac:
                radar_positions.append(radon_pos)
        # Pad with zeros if fewer than 3 Radons detected
        radar_positions += [np.zeros(2)] * (3 - len(radar_positions))
        radar_positions = np.array(radar_positions, dtype=np.float32)
        
        # Check if detected by any Radon
        detected = False
        for radon_pos, radon_r in zip(self.pos_radons, self.r_radons):
            distance = np.linalg.norm(self.pos_vac - radon_pos)
            if distance <= radon_r:
                detected = True
                break
        
        return {
            "vac_position": noisy_pos,
            "vac_velocity": noisy_vel,
            "obstacle_positions": self.ebs,
            "radar_positions": radar_positions,
            "goal_position": self.pos_grad,
            "detected": int(detected)
        }

    def step(self, action):
        # Update velocity (clipped to max_speed)
        self.vel_vac = np.clip(self.vel_vac + action, -self.max_speed, self.max_speed)
        # Update position
        self.pos_vac += self.vel_vac
        
        # Check termination conditions
        done = False
        reward = -np.linalg.norm(self.pos_vac - self.pos_grad)  # Reward based on distance to goal
        
        # Collision with obstacle
        for obs in self.ebs:
            if np.linalg.norm(self.pos_vac - obs) < 1.0:  # Obstacle radius = 1.0
                reward -= 50
                done = True
        
        # Collision with Radon or detection
        for radon_pos, radon_r in zip(self.pos_radons, self.r_radons):
            if np.linalg.norm(self.pos_vac - radon_pos) < radon_r:
                reward -= 50
                done = True
        
        # Reached goal
        if np.linalg.norm(self.pos_vac - self.pos_grad) < 2.0:
            reward += 100
            done = True
        
        return self._get_obs(), reward, done, False, {}

    def render(self, mode='human'):
        pass  # Optional: Add visualization logic

    def close(self):
        pass

In [4]:
env = VacEnv()
obs, _ = env.reset()
done = False
while not done:
    action = env.action_space.sample()  # Replace with agent's policy
    obs, reward, done, _, _ = env.step(action)
env.close()

KeyboardInterrupt: 