## Install Dependencies

In [1]:
# This is for installing our environment dependencies
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

!apt-get update
!apt-get install cmake
!pip install --upgrade setuptools
!pip install ez_setup
!pip install gym[atari]

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
ffmpeg is already the newest version (7:3.4.4-0ubuntu0.18.04.1).
xvfb is already the newest version (2:1.19.6-1ubuntu4.2).
0 upgraded, 0 newly installed, 0 to remove and 62 not upgraded.
Ign:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64  InRelease
Hit:2 http://security.ubuntu.com/ubuntu bionic-security InRelease
Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64  InRelease
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64  Release
Hit:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64  Release
Hit:6 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Hit:8 http://archive.ubuntu.com/ubuntu bionic InRelease
Hit:10 http://archive.ubuntu.com/ubuntu bionic-updates InRelease
Hit:11

## The Code

Check out the list of gym environments [here](https://github.com/openai/gym/wiki/Table-of-environments).

In [17]:
import os
import warnings
import gym

from gym import logger as gymlogger
from gym.wrappers import Monitor
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay

gymlogger.set_level(40) #error only

# setup the display
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

# gym has an annoying warning that we need to get rid of.
warnings.simplefilter("ignore")

NUM_EPISODES = 3  #@param {type: "number"}
RENDER_TRAINING = False #@param ["True", "False"] {type:"raw"}
ENV_NAME = 'SpaceInvaders-ram-v0' #@param {type: "string"}

def main():
  env = wrap_env(gym.make(ENV_NAME))
  random_agent = RandomAgent(env)
  random_agent.train(NUM_EPISODES, RENDER_TRAINING)

  reward = random_agent.play()
  print(f'Reward from playing: {reward}')

class RandomAgent(object):
  def __init__(self, env):
    super().__init__()
    self.env = env
    self.best_reward = None
    self.best_actions = []
    
  def train(self, num_episodes, render_training=False):
    for _ in range(num_episodes):
      initial_state = self.env.reset()
      if render_training:
        self.env.render()
      
      # Play an episode
      done = False
      total_reward = 0
      actions = []
      while not done:
        action = self.env.action_space.sample()
        actions.append(action)
        new_state, reward, done, _ = self.env.step(action)
        if render_training:
            self.env.render()
            
        total_reward += reward
      
      # check if we need to update our best stuff
      if self.best_reward is None or self.best_reward < total_reward:
        self.best_reward = total_reward
        self.best_actions = actions

      if render_training:
        show_video()

    print(f'Best Reward during training: {self.best_reward}')
          
  def play(self):
    self.env.reset()
    self.env.render()
    total_reward = 0
    for action in self.best_actions:
      _, reward, done, _ = self.env.step(action)
      self.env.render()
      total_reward += reward
      # if we are done exit the loop
      if done:
        break
    
    show_video()
    return total_reward

# Helpers for rendering
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    
def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

# Call main at the end of the cell
main()


Best Reward during training: 155.0


Reward from playing: 120.0
