In [1]:
import gymnasium as gym
from gymnasium.utils import play
import matplotlib.pyplot as plt
import numpy as np
import time

In [2]:
### Investigate the action space and observation space ###

env_name = 'MountainCar-v0'
env = gym.make(env_name, render_mode='rgb_array')

observation, info = env.reset(seed=42)

# Print info, obs space, action space
print(f'{info=}')
print(f'{observation=}') # np.array([position, velocity])
print(f'{env.observation_space=}')
print(f'{env.action_space=}')
# 0: Accelerate left
# 1: Don't accelerate
# 2: Accelerate right

env.close()

info={}
observation=array([-0.4452088,  0.       ], dtype=float32)
env.observation_space=Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
env.action_space=Discrete(3)


In [None]:
### Hard coding an action: Always accelerate to the right ###

env_name = 'MountainCar-v0'
env = gym.make(env_name, render_mode='human')

observation, info = env.reset(seed=42)

for step in range(200):
    env.render()
    action = 2
    observation, reward, done, truncated, info = env.step(action)
    print(f'{step=} | {action=} | {observation=} | {reward=} | {done=} | {truncated=} | {info=}')
    time.sleep(0.02) # Slow down steps to observe the rendering

    if done or truncated:
        break

env.close()

step=0 | action=2 | observation=array([-4.4479132e-01,  4.1747934e-04], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=1 | action=2 | observation=array([-0.4439594 ,  0.00083191], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=2 | action=2 | observation=array([-0.4427191 ,  0.00124029], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=3 | action=2 | observation=array([-0.4410795 ,  0.00163962], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=4 | action=2 | observation=array([-0.43905246,  0.00202703], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=5 | action=2 | observation=array([-0.43665275,  0.00239971], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=6 | action=2 | observation=array([-0.43389776,  0.00275498], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=7 | action=2 | observation=array([-0.

In [5]:
### Creating a simple agent that takes in observation and returns an action based on basic if/else logic ###

env_name = 'MountainCar-v0'
env = gym.make(env_name, render_mode='human')
observation, info = env.reset(seed=42)

def simple_agent(observation):
    pos, vel = observation
    if vel < 0:
        return 0 # Action=0 (accelerate left if cart currently moving left)
    elif vel > 0:
        return 2 # Action=2 (accelerate right)
    else:
        return 1 # No acceleration

for step in range(200):
    env.render()
    action = simple_agent(observation)
    observation, reward, done, truncated, info = env.step(action)
    print(f'{step=} | {action=} | {observation=} | {reward=} | {done=} | {truncated=} | {info=}')
    time.sleep(0.02) # Slow down steps to observe the rendering

    if done or truncated:
        break

env.close()

step=0 | action=1 | observation=array([-0.4457913 , -0.00058252], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=1 | action=0 | observation=array([-0.4479521 , -0.00216079], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=2 | action=0 | observation=array([-0.4516754 , -0.00372328], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=3 | action=0 | observation=array([-0.45693392, -0.00525853], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=4 | action=0 | observation=array([-0.46368912, -0.00675519], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=5 | action=0 | observation=array([-0.47189122, -0.0082021 ], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=6 | action=0 | observation=array([-0.4814796 , -0.00958835], dtype=float32) | reward=-1.0 | done=False | truncated=False | info={}
step=7 | action=0 | observation=array([-0.492382