In [2]:
# 导入相关包
import numpy as np
import pygame

import gymnasium as gym
from gymnasium import spaces

pygame 2.4.0 (SDL 2.26.4, Python 3.10.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


## 1. 环境属性定义，窗口，状态空间，动作空间，Reward，状态转移

In [4]:
# 继承类以及定义元数据(属性) 与 初始化方法
class GridWorldEnv(gym.Env):
    metadata = {"render_mode": ["human", "rgb_array"], "render_fps": 4}

    # 默认render_mode为None, size为5, 作为可改动属性
    def __init__(self, render_mode=None, size=5):
        self.size = size # The size of the square grid
        self.window_size = 512 # windows size 表明是 pygame 游戏窗口大小
        
        # 然后定义观察空间与动作空间，两者都是必须定义
            # 目前观察，该Space类有许多数据类型可供选择，基础数据类型与符合数据类型，甚至矢量空间单元(推测与神经网络有关，具体不了解)
        self.observation_space = spaces.Dict(
            {
                # 其中 Box 前面两个值表明上下限，shape则是一行两列，后者数据类型为int
                    # 另外隐藏seed，表明是否初始化随机生成，其中默认为seed=None, (作为采样工具等等)
                "agent": spaces.Box(0, size-1, shape=(2,), dtype=int),
                "target": spaces.Box(0, size-1, shape=(2,), dtype=int)
            }
        )
        
        # 动作空间，表明有4个方位
        self.action_space = spaces.Discrete(4)
        
        # 定义动作对agent自身位置的改变
        """
        The following dictionary maps abstract actions from `self.action_space` to
        the direction we will walk in if that action is taken.
        I.e. 0 corresponds to "right", 1 to "up" etc.
        """
        self._action_to_direction = {
            0: np.array([1,0]), # 横轴加1，向右，假设pygame坐标轴原点是在左下角
            1: np.array([0,1]), # 纵轴加1，向上
            2: np.array([-1,0]), # 向左
            3: np.array([0,-1]), # 向下
        }
        
        # assert 断言，明确肯定，如果表达式为false，那么触发异常
            # 表明要么 render_mode 是空的，要么就在给定的元数据模式中
        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        """
        If human-rendering is used, `self.window` will be a reference
        to the window that we draw to. `self.clock` will be a clock that is used
        to ensure that the environment is rendered at the correct framerate in
        human-mode. They will remain `None` until human-mode is used for the
        first time.
        """
        
        # 对以上总结，如果human-rendering被启用
            # 那么self.window作为绘制窗口，clock则作为钟表记时，来确保在正确帧率下渲染
            # 比如前者的 rendering_fps = 4, 每4帧渲染一次
        self.window = None
        self.clock = None

## 2. 私有方法获取环境的观察与信息

In [5]:
    # 私有方法 获取 观察空间，对于每个环境，必须有
    '''
    Since we will need to compute observations both in reset and step,
    it is often convenient to have a (private) method _get_obs that translates the environment’s state into an observation. 
    However, this is not mandatory and you may as well compute observations in reset and step separately:
    '''
    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}

    # 同样需要一个提供信息的私有方法
    '''
    We can also implement a similar method for the auxiliary information that is returned by step and reset. 
    In our case, we would like to provide the manhattan distance between the agent and the target:
    '''
    def _get_info(self):
        return{
            # 范数计算，ord 则表明计算形式，其中1表示1阶，即每一项绝对值相加
                # 具体 ord 查看官方文档
            "distance": np.linalg.norm(
                self._agent_location - self._target_location, ord=1
            )
        }

## 3. 重置环境

In [7]:
    # 重置方法，用来初始化一个新的环境
        # 通常在有 done signal 时就需要重置环境
        # 对于 agent 初始化随机位置使用 np.random()
            # 如果使用随机数生成一般也不需要担心随机种子，但是要记住继承 super().reset(seed=seed)

    # reset 方法应该返还一个元组
        # 包含着初始状态 与 辅助信息
        # 因此会使用到私有方法 _get_obs 与 _get_info

    def reset(self, seed=None, options=None):
        # 使用继承方法，来保证gym.env的随机种子相同，针对reset方法的继承
        super().reset(seed=seed)

        # 对于agent位置进行随机均匀初始化
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        # 随机初始化target的位置，保证不等于agent的位置
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(
                0, self.size, size=2, dtype=int
            )

        observation = self._get_obs()
        info = self._get_info()

        # 如果选定的渲染模式为 human 此时调用私有的 _render_frame 方法
        if self.render_mode == "human":
            self._render_frame()

        return observation, info

## 4. 环境更新步骤 以及 返还对应元组

In [8]:
    # step,表明环境接受 agent 动作后发生的变化
        # 理论上会返还一个 4元组 ， 对于其他环境，可能返还的状态数有所区别，关键看源代码中如何定义的
        # 同样会使用 _get_obs 与  _get_info:

    def step(self, action):
        # 根据动作匹配环境改变direction
        direction = self._action_to_direction[action]
        # We use `np.clip` to make sure we don't leave the grid
            # 使用 np.clip 方法就能直接保证不会出网格，只会撞墙
        self._agent_location = np.clip(
            self._agent_location + direction, 0, self.size - 1
        )

        # 假定 agent 抵达了目的地，此时改变四元组中的 done状态，同时给定 reward
        # An episode is done iff the agent has reached the target
        terminated = np.array_equal(self._agent_location, self._target_location)
        reward = 1 if terminated else 0  # Binary sparse rewards
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        # 对于该 false的理解目前为是否是提前终止，但是返回默认为了False
        return observation, reward, terminated, False, info

## 5. Rendering 渲染

In [10]:
    # Tutorial中介绍用 pygame 来实现渲染
    
    # 第一种针对  rgb_array ，仅仅是返还对应数据
    # 第二种 human,则是初始化 pygame 窗口，完成动画更新，使用了render_fps
        # 对于 human 的理解，需要学习如何搭建 pygame 窗口，以及动画更新
    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode(
                (self.window_size, self.window_size)
            )
        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = (
            self.window_size / self.size
        )  # The size of a single grid square in pixels

        # First we draw the target
        pygame.draw.rect(
            canvas,
            (255, 0, 0),
            pygame.Rect(
                pix_square_size * self._target_location,
                (pix_square_size, pix_square_size),
            ),
        )
        # Now we draw the agent
        pygame.draw.circle(
            canvas,
            (0, 0, 255),
            (self._agent_location + 0.5) * pix_square_size,
            pix_square_size / 3,
        )

        # Finally, add some gridlines
        for x in range(self.size + 1):
            pygame.draw.line(
                canvas,
                0,
                (0, pix_square_size * x),
                (self.window_size, pix_square_size * x),
                width=3,
            )
            pygame.draw.line(
                canvas,
                0,
                (pix_square_size * x, 0),
                (pix_square_size * x, self.window_size),
                width=3,
            )

        if self.render_mode == "human":
            # The following line copies our drawings from `canvas` to the visible window
            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            # We need to ensure that human-rendering occurs at the predefined framerate.
            # The following line will automatically add a delay to keep the framerate stable.
            self.clock.tick(self.metadata["render_fps"])
        else:  # rgb_array
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )

## 6. 关闭

In [11]:
    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()