In [41]:
# @title Setup code (not important) - Run this cell by pressing "Shift + Enter"


!pip install -qq gym==0.23.0


import matplotlib
from matplotlib import animation
from IPython.display import HTML
import matplotlib.pyplot as plt


def display_video(frames):
    # Copied from: https://colab.research.google.com/github/deepmind/dm_control/blob/master/tutorial.ipynb
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    matplotlib.use(orig_backend)
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])
    def update(frame):
        im.set_data(frame)
        return [im]
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                    interval=50, blit=True, repeat=False)
    plt.close('all')
    return HTML(anim.to_html5_video())



def test_env(environment, episodes=10):
    frames = []
    for episode in range(episodes):
        state = environment.reset()
        done = False
        frames.append(environment.render(mode="rgb_array"))

        while not done:
            action = environment.action_space.sample()
            next_state, reward, done, extra_info = environment.step(action)
            img = environment.render(mode="rgb_array")
            frames.append(img)
            state = next_state

    return display_video(frames)



In [None]:
#def test_env(env: gym.Env) -> None:
#    env.reset()
#   done = False
#   img = plt.imshow(env.render(mode = 'rgb_array'))
#   while not done:
#       _, _, done, _ = env.step(env.action_space.sample())
#       plt.axis('off')
#       display.display(plt.gcf())
#       display.clear_output(wait = True)

In [42]:
import gym
import numpy as np
from IPython import display
import matplotlib.pyplot as plt
%matplotlib inline

In [43]:
# CARTPOLE:Keep the tip of the pole straight

env = gym.make('CartPole-v1')
test_env(env,1)

  matplotlib.use(orig_backend)


The states of the cartpole task will be represented by a vector of four real numbers:

    Num     Observation               Min                     Max
    0       Cart Position             -4.8                    4.8
    1       Cart Velocity             -Inf                    Inf
    2       Pole Angle                -0.418 rad (-24 deg)    0.418 rad (24 deg)
    3       Pole Angular Velocity     -Inf                    Inf

In [44]:
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [45]:
env.action_space

Discrete(2)

In [None]:
# ACROBAT: Swing the bbar up to a certain height
env = gym.make('Acrobot-v1')
test_env(env,1)

  matplotlib.use(orig_backend)


In [47]:
env.observation_space

Box([ -1.        -1.        -1.        -1.       -12.566371 -28.274334], [ 1.        1.        1.        1.       12.566371 28.274334], (6,), float32)

In [None]:
#Actions
#0    Apply +1 torque on the joint between the links.
#1    Apply -1 torque on the joint between the links.
env.action_space

Discrete(3)

In [49]:
# MOUNTAIN CAR:Reach the goal from the bottom of the valley
env = gym.make('MountainCar-v0')
test_env(env,1)

  matplotlib.use(orig_backend)


In [50]:
env.observation_space

Box([-1.2  -0.07], [0.6  0.07], (2,), float32)

In [None]:
# Actions
#0    Accelerate to the left.
#1    Don't accelerate.
#2    Accelerate to the right.
env.action_space

Discrete(3)

In [52]:
# PENDULUM : Swing it and keep it upright
env = gym.make('Pendulum-v1')
test_env(env,1)

  matplotlib.use(orig_backend)


In [53]:
env.observation_space

Box([-1. -1. -8.], [1. 1. 8.], (3,), float32)

In [None]:
#The action is a real number in the interval  [−2,2]  that represents the torque applied on the pendulum.
env.action_space

Box(-2.0, 2.0, (1,), float32)