In [2]:
import gym
import time

In [3]:
env = gym.make('MountainCar-v0', render_mode='rgb_array')

In [4]:
# Observation and action space
obs_space = env.observation_space
action_space = env.action_space
print(f"The observation space: {obs_space}")
print(f"The action space: {action_space}")

The observation space: Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
The action space: Discrete(3)


In [6]:
import warnings
warnings.filterwarnings("ignore")


In [7]:
import matplotlib.pyplot as plt

obs = env.reset()
print(f"The intial observation is {obs}")

random_action = env.action_space.sample()

new_obs, reward, done, truncated, info = env.step(random_action)
print(f"The new observation is: {new_obs}")

The intial observation is (array([-0.43551907,  0.        ], dtype=float32), {})
The new observation is: [-0.437172   -0.00165294]


In [9]:
env_screen = env.render()
plt.imshow(env_screen)

DependencyNotInstalled: pygame is not installed, run `pip install gym[classic_control]`

Complete code for a trail run

In [None]:
import time
import gym

env = gym.make('MountainCar-v0', render_mode='human')
num_steps = 100
obs = env.reset()
for step in range(num_steps):
    action = env.action_space.sample()
    obs, reward, done, terminated, info = env.step(action)

    env.render()

    print("Step->",step,"and Action ",action)
    time.sleep(0.001)

    if done:
        env.reset()

env.close()

Step-> 0 and Action  0
Step-> 1 and Action  0
Step-> 2 and Action  0
Step-> 3 and Action  0
Step-> 4 and Action  1
Step-> 5 and Action  1
Step-> 6 and Action  0
Step-> 7 and Action  0
Step-> 8 and Action  0
Step-> 9 and Action  2
Step-> 10 and Action  0
Step-> 11 and Action  1
Step-> 12 and Action  0
Step-> 13 and Action  0
Step-> 14 and Action  1
Step-> 15 and Action  0
Step-> 16 and Action  2
Step-> 17 and Action  1
Step-> 18 and Action  2
Step-> 19 and Action  2
Step-> 20 and Action  2
Step-> 21 and Action  2
Step-> 22 and Action  0
Step-> 23 and Action  2
Step-> 24 and Action  2
Step-> 25 and Action  0
Step-> 26 and Action  2
Step-> 27 and Action  1
Step-> 28 and Action  2
Step-> 29 and Action  2
Step-> 30 and Action  2
Step-> 31 and Action  1
Step-> 32 and Action  2
Step-> 33 and Action  2
Step-> 34 and Action  2
Step-> 35 and Action  1
Step-> 36 and Action  1
Step-> 37 and Action  1
Step-> 38 and Action  0
Step-> 39 and Action  1
Step-> 40 and Action  1
Step-> 41 and Action  0
St

Spaces

In [None]:
print(f"Upper Bound for Env Observation: {env.observation_space.high}")
print(f"Lower Bound for Env Observation: {env.observation_space.low}")

Upper Bound for Env Observation: [0.6  0.07]
Lower Bound for Env Observation: [-1.2  -0.07]


Wrappers

In [None]:
env = gym.make("BreakoutNoFrameskip-v4", render_mode='human')

print(f"Observation Space: {env.observation_space}")
print(f"Action Space: {env.action_space}")

obs = env.reset()

for i in range(100):
    action = env.action_space.sample()
    obs, reward, done, terminated, info = env.step(action)
    env.render()
    time.sleep(0.01)
    if done:
        env.reset()
env.close()

In [None]:
from collections import deque
from gym import spaces
import numpy as np

class ConcatObs(gym.Wrapper):
    def __init__(self,env, k):
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0, high=255, shape=((k,) + shp), dtype=env.observation_space.dtype)
    
    def reset(self):
        ob, _ = self.env.reset()
        for _ in range(self.k):
            self.frames.append(ob)
        return self._get_ob()
    
    
    def step(self, action):
        ob, reward, done, terminated, info = self.env.step(action)
        self.frames.append(ob)
        return self._get_ob(), reward, done, terminated, info
    
    def _get_ob(self):
        return np.array(self.frames)

In [None]:
env = gym.make("BreakoutNoFrameskip-v4")
wrapped_env = ConcatObs(env, 4)
print(f"The new observation space is: {wrapped_env.observation_space}")

In [None]:
obs = wrapped_env.reset()
print(f"Initial obs is of the shape: {obs.shape})")

obs, _, _, _, _ = wrapped_env.step(2)
print(f"Obs after taking a step is {obs.shape}")

Documentation Lookup

In [None]:
help(env.reset)

Help on method reset in module gym.wrappers.time_limit:

reset(**kwargs) method of gym.wrappers.time_limit.TimeLimit instance
    Resets the environment with :param:`**kwargs` and sets the number of steps elapsed to zero.
    
    Args:
        **kwargs: The kwargs to reset the environment with
    
    Returns:
        The reset environment



In [None]:
help(env.step)

Help on method step in module gym.wrappers.time_limit:

step(action) method of gym.wrappers.time_limit.TimeLimit instance
    Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate.
    
    Args:
        action: The environment step action
    
    Returns:
        The environment step ``(observation, reward, terminated, truncated, info)`` with `truncated=True`
        if the number of steps elapsed >= max episode steps

