In [22]:

import gymnasium as gym
import numpy as np
env = gym.make('MountainCar-v0')
print('观测空间 = {}'.format(env.observation_space))
print('动作空间 = {}'.format(env.action_space))
print('观测范围 = {} ~ {}'.format(env.observation_space.low,
        env.observation_space.high))
print('动作数 = {}'.format(env.action_space.n))


观测空间 = Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
动作空间 = Discrete(3)
观测范围 = [-1.2  -0.07] ~ [0.6  0.07]
动作数 = 3


In [23]:
class SimpleAgent:
    def __init__(self, env):
        pass
    
    def decide(self, observation):
        position = observation[0]     # 获取位置
        velocity = observation[1]     # 获取速度

        lb = np.minimum(-0.09 * (position + 0.25) ** 2 + 0.03,
                        0.3 * (position + 0.9) ** 4 - 0.008)
        ub = -0.07 * (position + 0.38) ** 2 + 0.07

        if lb < velocity < ub:
            return 0
        else:
            return 1
    def learn(self, *args): # 学习
        pass
    
agent = SimpleAgent(env)


In [24]:
def play(env, agent, render=False, train=False):
    episode_reward = 0. # 记录回合总奖励，初始值为0
    # env.reset() 是 (array([position, velocity]), {}) 的形式
    observation, info = env.reset() # 重置游戏环境，开始新回合
    while True: # 不断循环，直到回合结束
        if render: # 判断是否显示
            env.render() # 显示图形界面
        action = agent.decide(observation)
        next_observation, reward, terminated, truncated, info= env.step(action) # 执行动作
        episode_reward += reward # 收集回合奖励
        if train: # 判断是否训练智能体
            agent.learn(observation, action, reward,terminated,truncated ) # 学习
        if terminated or truncated: # 回合结束，跳出循环
            break
        observation = next_observation
    return episode_reward # 返回回合总奖励


In [25]:
env.reset(seed=3) # 设置随机种子，让结果可复现
episode_reward = play(env, agent, render=True)
print('回合奖励 = {}'.format(episode_reward))
env.close() # 关闭图形界面

回合奖励 = -200.0


In [26]:
episode_rewards = [play(env, agent) for _ in range(100)]
print('平均回合奖励 = {}'.format(np.mean(episode_rewards)))

平均回合奖励 = -200.0


大概流程：env=gym.make(环境名)取出环境，使用 env.reset()初始化环境，使用 env.step(动作)执行一步环境，使用 env.render()显示环境，使用 env.close()关闭环境