In [15]:
import gym
from gym.wrappers import RescaleAction
import numpy as np 
from gym.spaces import Discrete
from copy import deepcopy

其他相关的信息可以查看`ReinforcementLearning/gym环境使用技巧/gym.md`

## 基本的展示

In [2]:
# 初始化一个base_env, 我们可以查看其`action_space`
base_env = gym.make("BipedalWalker-v3")
base_env.action_space

Box(-1.0, 1.0, (4,), float32)

In [3]:
# 使用内置类修改之后可以发现，其`action_space`发生变化了
wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
wrapped_env.action_space

Box(0.0, 1.0, (4,), float32)

In [4]:
# 可以check一下这个类的情况
base_env

<TimeLimit<OrderEnforcing<BipedalWalker<BipedalWalker-v3>>>>

In [5]:
wrapped_env

<RescaleAction<TimeLimit<OrderEnforcing<BipedalWalker<BipedalWalker-v3>>>>>

## 更General的介绍

一般而言我们需要对环境进行以下改变：
1. Transform **actions** before applying them to the base environment
2. Transform **observations** that are returned by the base environment
3. Transform **rewards** that are returned by the base environment

我们可以使用`ActionWrapper`, `ObservationWrapper`, `RewardWrapper`按照需求对环境进行修改，而更复杂的case则需要使用`Wrapper`类。

- 使用`ActionWrapper`对相关的内容进行修改


In [7]:
class DiscreteActions(gym.ActionWrapper):
    def __init__(self, env, disc_to_cont):
        super().__init__(env)
        self.disc_to_cont = disc_to_cont
        self.action_space = Discrete(len(disc_to_cont))
    
    def action(self, act):
        return self.disc_to_cont[act]

env = gym.make("LunarLanderContinuous-v2")
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
                                    np.array([0,1]), np.array([0,-1])])

print(wrapped_env.action_space)         #Discrete(4)
print(env.action_space)

Discrete(4)
Box(-1.0, 1.0, (2,), float32)


- 使用`RewardWrapper`对相关的内容进行修改

In [16]:
class LTailReward(gym.RewardWrapper):
    ''' Light Tail Reward function'''
    def __init__(self, env, mu = 0., sigma = 1.):
        super().__init__(env)
        self.mu = mu
        self.sigma = sigma
    
    def reward(self, reward):
        return reward + np.random.normal(self.mu, self.sigma)

env = gym.make('CartPole-v1')
wrapped_env = LTailReward(deepcopy(env))

In [19]:
env.reset()
wrapped_env.reset()

for i in range(10):
    action = env.action_space.sample()
    obs1, reward_1, done1, info1 = env.step(action)
    obs2, reward_2, done2, info2 = wrapped_env.step(action)
    print(reward_1, '\t' ,reward_2)
    pass

1.0 	 0.5736777105046559
1.0 	 2.3883410076424827
1.0 	 1.467618929155249
1.0 	 1.283649991851012
1.0 	 -0.8679148247331889
1.0 	 -0.3662533561181891
1.0 	 -0.6414553113350954
1.0 	 1.9630420930538177
1.0 	 -0.32213309387765765
1.0 	 0.6018081713107806


- 一些可以直接用的Wrapper参见：[Link](https://www.gymlibrary.dev/api/wrappers/#available-wrappers)