Instalacja:
* `pip install gym`

Opcjonalnie:
* `pip install gym[atari]`
* itd.

In [1]:
import time

import gym
from gym import spaces

Krótka dokumentacja:

https://gym.openai.com/docs/

Environments:

https://gym.openai.com/envs/

Dwie rodzaje "przestrzeni":
* `Discrete` - skończona liczba stanów,
* `Box` - każda współrzędna jest przedziałem (potencjalnie nieograniczonym) na prostej rzeczywistej.

In [2]:
def describe_environment(env):
    def _describe(space):
        print(space)
        if isinstance(space, spaces.Box):
            print("shape:", space.shape)
            print("lower bounds:", space.low)
            print("upper bounds:", space.high)
            print("sample:", space.sample())
        elif isinstance(space, spaces.Discrete):
            print("shape:", space.shape)
            print("n:", space.n)
            print("sample:", space.sample())
        else:
            pass
    print("Observation space:")
    _describe(env.observation_space)
    print()
    print("Action space:")
    _describe(env.action_space)

def test_environment(env, n_episodes=3, n_steps=200, fps=1000, break_if_done=True):
    for _ in range(n_episodes):
        observation = env.reset()
        for _ in range(n_steps):
            env.render()
            time.sleep(1./float(fps))
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done and break_if_done:
                break
    env.render(close=True)

## Classic control

In [3]:
env = gym.make('CartPole-v0')
describe_environment(env)
test_environment(env)

Observation space:
Box(4,)
shape: (4,)
lower bounds: [ -4.80000000e+00  -3.40282347e+38  -4.18879020e-01  -3.40282347e+38]
upper bounds: [  4.80000000e+00   3.40282347e+38   4.18879020e-01   3.40282347e+38]
sample: [  4.68609638e-01   1.46450285e+38   8.60908446e-02   3.05459097e+37]

Action space:
Discrete(2)
shape: ()
n: 2
sample: 1


In [4]:
test_environment(
    env=env,
    n_episodes=1,
    n_steps=250,
    break_if_done=False)

You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.


In [5]:
env = gym.make('MountainCar-v0')
describe_environment(env)
test_environment(env)

Observation space:
Box(2,)
shape: (2,)
lower bounds: [-1.2  -0.07]
upper bounds: [ 0.6   0.07]
sample: [-1.16666077  0.04111768]

Action space:
Discrete(3)
shape: ()
n: 3
sample: 1


In [6]:
env = gym.make('Acrobot-v1')
describe_environment(env)
test_environment(env)

Observation space:
Box(6,)
shape: (6,)
lower bounds: [ -1.          -1.          -1.          -1.         -12.56637061
 -28.27433388]
upper bounds: [  1.           1.           1.           1.          12.56637061
  28.27433388]
sample: [  5.44152229e-03   8.44695964e-01   8.27615875e-02   8.46612136e-01
   8.29122520e+00   2.64809726e+01]

Action space:
Discrete(3)
shape: ()
n: 3
sample: 1


In [7]:
env = gym.make('Pendulum-v0')
describe_environment(env)
test_environment(env)

Observation space:
Box(3,)
shape: (3,)
lower bounds: [-1. -1. -8.]
upper bounds: [ 1.  1.  8.]
sample: [ 0.42398115  0.1817151   3.20071202]

Action space:
Box(1,)
shape: (1,)
lower bounds: [-2.]
upper bounds: [ 2.]
sample: [-0.55626211]


## Atari

In [10]:
env = gym.make('AirRaid-ram-v0')
describe_environment(env)
test_environment(env, n_episodes=1, n_steps=250, fps=25)

Observation space:
Box(128,)
shape: (128,)
lower bounds: [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
upper bounds: [ 255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255. 

In [11]:
env = gym.make('Asterix-ram-v0')
describe_environment(env)
test_environment(env, n_episodes=1, n_steps=250, fps=25)

Observation space:
Box(128,)
shape: (128,)
lower bounds: [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
upper bounds: [ 255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255. 

In [12]:
env = gym.make('Breakout-ram-v0')
describe_environment(env)
test_environment(env, n_episodes=1, n_steps=250, fps=25)

Observation space:
Box(128,)
shape: (128,)
lower bounds: [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
upper bounds: [ 255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255. 

In [13]:
env = gym.make('MsPacman-ram-v0')
describe_environment(env)
test_environment(env, n_episodes=1, n_steps=250, fps=25)

Observation space:
Box(128,)
shape: (128,)
lower bounds: [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
upper bounds: [ 255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.  255.
  255.  255.  255.  255.  255.  255.  255. 