# Study of OpenAI Gym

In [1]:
import gym

## Environment

In [11]:
# create an environment
env = gym.make('PongNoFrameskip-v4')

The environment has the following function

In [16]:
# seed the environment for reproducible results
SEED = 10
env.seed(SEED)

[10, 1771873396]

In [20]:
# resets the environment and returns an initial observation
obs = env.reset()

In [7]:
# choose an action and apply the action
action = 0
next_obs, reward, done, info = env.step(1)

In [8]:
# render a frame of the environment
env.render()

True

In [9]:
# close the environment
env.close()

In [23]:
# the unwrapped function removes all the wrappers and returns the unwrapped environment
env = env.unwrapped

## Spaces

In [29]:
env = gym.make('FrozenLake-v0')

In [32]:
# Space is a generic class for action and observation spaces
# contains sample() and contains()
from gym import Space

In [36]:
# for example
isinstance(env.action_space, Space)
isinstance(env.observation_space, Space)

True

In [37]:
# get a random sample from a space
env.action_space.sample()

1

In [39]:
# check if it is member of the space
env.action_space.contains(22)

False

In [44]:
# but most spaces are of a more specific type
# Box and Discrete are the most common 
from gym.spaces import Discrete, Box

In [53]:
# Discrete is used for discrete spaces
# For example a two by two gridworld has 4 states and would therefore be
two_by_two = Discrete(4)
# FrozenLake has also a discrete observation space
env = gym.make('FrozenLake-v0')
print(env.observation_space)
# n saves the number of discrete observations
print(env.observation_space.n)

Discrete(16)
16


In [58]:
# Box is used for continuous spaces
# Requires low, high, shape and dtype
# An example would be atari observation space
env = gym.make('PongNoFrameskip-v4')
print(env.observation_space)

# low is 0: lowest possible rgb value
# high is 255: highest possible rgb value
# shape is 210 pixels x 160 pixels x 3 color channels (rgb)
# dtype is uint8, as colors do not have negative values

Box(0, 255, (210, 160, 3), uint8)


## Wrappers

In [70]:
# Wrappers are used to change the behaviour of the environment
env = gym.make('BreakoutNoFrameskip-v4')

In [71]:
# for example if you had the wrapper named change_frame_rate you could wrap your env 
#env = change_frame_rate(env)

In [81]:
# Base Class Wrapper 
# is especially suited to override functions like step and reset
# all the following wrappers inherit from the Wrapper Base Class
class MyWrapper(gym.Wrapper):
    def __init__(self, env):
        super(MyWrapper, self).__init__(env)
    
    def step(self, action):
        return self.env.step(action)

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

    def render(self, mode='human', **kwargs):
        return self.env.render(mode, **kwargs)

    def close(self):
        return self.env.close()

    def seed(self, seed=None):
        return self.env.seed(seed)

In [82]:
# Observation Wrapper
class MyObservationWrapper(gym.ObservationWrapper):
    # you have to override the observation function
    def observation(self, observation):
        return observation

In [83]:
# RewardWrapper
class MyRewardWrapper(gym.RewardWrapper):
    # you have to override the reward function
    def reward(self, reward):
        return reward

In [84]:
# ActionWrapper
class MyActionWrapper(gym.ActionWrapper):
    # you have to override the action function and the reverse_action function
    def action(self, action):
        return action
    
    def reverse_action(self, action):
        return action

In [87]:
env = MyWrapper(env)
env = MyObservationWrapper(env)
env = MyRewardWrapper(env)
env = MyActionWrapper(env)

In [91]:
env.reset()
_, _, _, _ = env.step(1)

## Monitor

In [93]:
# Monitor is a wrapper that lets you record videos of gameplay
# force means delete older files in the directory
env = gym.wrappers.Monitor(env=env, directory='./', force=True)

In [95]:
env.reset()
for i in range(20000):
    _, _, done, _ = env.step(1)
    if done:
        break
env.close()