In [None]:
import gym

In [None]:
# Load your gym environment
env = gym.make("Taxi-v3")

In [None]:
# Reset and return to original state
env.reset()

In [None]:
# Visualize the problem
env.render()

R, G, Y, B are the possible pickup and destination locations. The blue letter represents the current passenger pick-up location, and the purple letter is the current destination.

In [None]:
# What are the possible actions we can take in this environment
print("Action Space {}".format(env.action_space))

### Action Space Inference
The 6 discrete options are:
* 0 = south
* 1 = north
* 2 = east
* 3 = west
* 4 = pickup
* 5 = dropoff

In [None]:
# What are the possible observations?
# Possible combinations of where the taxi is and what it did
print("State Space {}".format(env.observation_space))

### Observation space inference
The 500 discrete options represent a encoding of the taxi's location, the passenger's location, and the destination location

In [None]:
# Attemp to take a step down
state, reward, done, info = env.step(3)
print("State: {}".format(state))
print("Reward: {}".format(reward))
print("Done: {}".format(done))
print("Info: {}".format(info))

In [None]:
env.render()

In [None]:
pickup = [1, 3, 0, 0, 4]
for i in pickup:
    state, reward, done, info = env.step(i)
    print("State: {}".format(state))
    print("Reward: {}".format(reward))
    print("Done: {}".format(done))
    print("Info: {}".format(info))
    env.render()

In [None]:
# This is where we use the Q learning algorithm.

### Cartpole Example

In [None]:
# Rendering in Colab requres extra work
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

In [None]:
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
# Load your gym environment
# Default
#env = gym.make("CartPole-v1")
# Method for rendering in colab
#env = wrap_env(gym.make("CartPole-v1"))
env = wrap_env(gym.make("MountainCar-v0"))

In [None]:
# Reset and return to original state
observation = env.reset()

In [None]:
# Visualize the problem

observation = env.reset()

while True:
  
    env.render()
    
    #your agent goes here
    action = env.action_space.sample() 
         
    observation, reward, done, info = env.step(action) 
   
        
    if done: 
      break;
            
env.close()
show_video()

In [None]:
# Reference: https://github.com/openai/gym/wiki
print("Action Space {}".format(env.action_space))
# Actions: Move Left or Right
print("State Space {}".format(env.observation_space))
# Observations: [position of cart, velocity of cart, angle of pole, rotation rate of pole]

In [None]:
state, reward, done, info = env.step(0)
print("State: {}".format(state))
print("Reward: {}".format(reward))
print("Done: {}".format(done))
print("Info: {}".format(info))

In [None]:
for i in range(10):
    state, reward, done, info = env.step(0)
    print("State: {}".format(state))
    print("Reward: {}".format(reward))
    print("Done: {}".format(done))
    print("Info: {}".format(info))

### You can't keep track of all possible observation state values as they are quite dynamic. 
### Here is where DQN comes into play