In [37]:
# This is for installing our environment dependencies
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
!pip -q install pyglet
!pip -q install pyopengl

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
ffmpeg is already the newest version (7:3.4.4-0ubuntu0.18.04.1).
xvfb is already the newest version (2:1.19.6-1ubuntu4.2).
0 upgraded, 0 newly installed, 0 to remove and 13 not upgraded.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
0 upgraded, 0 newly installed, 0 to remove and 13 not upgraded.


In [3]:
import numpy
import gym
import warnings
import os

# Getting rendering to work
# from pyvirtualdisplay import Display
# display = Display(visible=0, size=(1024, 768))
# display.start()
# import os
# os.environ["DISPLAY"] = ":" + str(display.display) + "." + str(display.screen)

# gym has an annoying warning that we need to get rid of.
warnings.simplefilter("ignore")

NUM_EPISODES = 100
RENDER_TRAINING = False
ENV_NAME = 'Taxi-v2'

def main():
  env = gym.make(ENV_NAME)
  random_agent = RandomAgent(env)
  random_agent.train(NUM_EPISODES, RENDER_TRAINING)

  reward = random_agent.play()
  print(f'Reward: {reward}')

class RandomAgent(object):
  def __init__(self, env):
    super().__init__()
    self.env = env
    self.best_reward = None
    self.best_actions = []
    
  def train(self, num_episodes, render_training=False):
    for _ in range(num_episodes):
      initial_state = self.env.reset()
      if render_training:
        self.env.render()
      
      # Play an episode
      done = False
      total_reward = 0
      actions = []
      while not done:
        action = self.env.action_space.sample()
        actions.append(action)
        new_state, reward, done, _ = self.env.step(action)
        if render_training:
            self.env.render()
            
        total_reward += reward
      
      # check if we need to update our best stuff
      if self.best_reward is None or self.best_reward < total_reward:
        self.best_reward = total_reward
        self.best_actions = actions
    print(f'Best Reward during training: {self.best_reward}')
          
  def play(self):
    self.env.reset()
    self.env.render()
    total_reward = 0
    for action in self.best_actions:
      _, reward, _, _ = self.env.step(action)
      self.env.render()
      total_reward += reward
    return total_reward

# Call main at the end of the cell
main()


Best Reward during training: -103
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : :[43m [0m: |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+

+---------+
|[35mR[0m: | : :G|
| : : :[43m [0m: |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
+---------+
|[35mR[0m: | : :G|
| : : :[43m [0m: |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (Dropoff)
+---------+
|[35mR[0m: | : :G|
| : : :[43m [0m: |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (Pickup)
+---------+
|[35mR[0m: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (East)
+---------+
|[35mR[0m: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (East)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : : :[43m [0m|
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
+---------+
|[35mR[0m: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
