In [6]:
# Modify the import statement in maze library if possible
from collections.abc import ValuesView

In [None]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from gym import spaces
import random

class EndoscopyEnv(gym.Env):
    def __init__(self):
        super(EndoscopyEnv, self).__init__()

        # Define action and observation space
        self.action_space = spaces.Discrete(182)  # 0-179 degrees rotation, 180: forward, 181: backward

        # Observation space: x, y position, orientation angle, front sensor readings
        self.observation_space = spaces.Box(low=np.array([0, 0, 0, 0]), high=np.array([100, 100, 180, 1]), dtype=np.float32)

        # Initial state
        self.state = np.array([1, 1, 90, 0], dtype=np.float32)  # x, y, orientation, front sensor reading
        self.goal = np.array([8, 8])  # goal at the bottom right

        self.max_steps = 200
        self.current_step = 0
        self.grid_size = 10  # Grid size to create the maze
        self.maze = self._create_maze()

    def _create_maze(self):
        maze = np.ones((self.grid_size, self.grid_size), dtype=int)
        start = (1, 1)
        end = (self.grid_size - 2, self.grid_size - 2)

        # Ensure start and end points are clear
        maze[start[0]][start[1]] = 0
        maze[end[0]][end[1]] = 0

        stack = [start]
        directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]

        while stack:
            current = stack[-1]
            neighbors = []

            for direction in directions:
                nx, ny = current[0] + direction[0], current[1] + direction[1]
                if 1 <= nx < self.grid_size - 1 and 1 <= ny < self.grid_size - 1 and maze[nx][ny] == 1:
                    neighbors.append((nx, ny))

            if neighbors:
                next_cell = random.choice(neighbors)
                maze[next_cell[0]][next_cell[1]] = 0
                maze[(current[0] + next_cell[0]) // 2][(current[1] + next_cell[1]) // 2] = 0
                stack.append(next_cell)
            else:
                stack.pop()

        return maze

    def reset(self):
        self.state = np.array([1, 1, 90, 0], dtype=np.float32)
        self.current_step = 0
        self.maze = self._create_maze()
        return self.state

    def step(self, action):
        self.current_step += 1

        if action == 180:  # move forward
            self.state[1] += np.cos(np.radians(self.state[2]))
            self.state[0] += np.sin(np.radians(self.state[2]))
        elif action == 181:  # move backward
            self.state[1] -= np.cos(np.radians(self.state[2]))
            self.state[0] -= np.sin(np.radians(self.state[2]))
        else:  # rotate
            self.state[2] = (self.state[2] + action) % 180

        # Update sensor reading (simplified for this example)
        self.state[3] = self._get_sensor_reading()

        done = np.linalg.norm(self.state[:2] - self.goal) < 1 or self.current_step >= self.max_steps
        reward = -1.0  # Penalize each step to encourage efficiency

        if done:
            if np.linalg.norm(self.state[:2] - self.goal) < 1:
                reward = 100.0  # Reward reaching the goal
            else:
                reward = -100.0  # Penalize not reaching the goal within the maximum steps

        info = {}
        return self.state, reward, done, info

    def _get_sensor_reading(self):
        # Simplified sensor reading: distance to goal normalized
        distance = np.linalg.norm(self.state[:2] - self.goal)
        return 1.0 - (distance / np.linalg.norm(np.array([0, 0]) - self.goal))

    def render(self, mode='human'):
        plt.figure(figsize=(10, 10))
        plt.imshow(self.maze, cmap='Greys', origin='upper', extent=(0, 100, 0, 100))
        plt.plot(self.state[1] * 10, self.state[0] * 10, 'bo', markersize=10)  # Endoscope position
        plt.plot(self.goal[1] * 10, self.goal[0] * 10, 'ro', markersize=10)  # Goal position
        plt.xlabel('X-axis')
        plt.ylabel('Y-axis')
        plt.title('Endoscopy Robot Environment')
        plt.grid()
        plt.show()

# Instantiate and test the environment
env = EndoscopyEnv()
obs = env.reset()

done = False
while not done:
    action = env.action_space.sample()  # Sample random action
    obs, reward, done, info = env.step(action)
    env.render()
    print(f"State: {obs}, Reward: {reward}, Done: {done}")
