<a href="https://colab.research.google.com/github/irvinec/mini-assignment-1/blob/master/casey/RandomPolicyAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Casey's answer to mini assgnment 1.

## Install Dependencies

In [0]:
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

!apt-get update
!apt-get install cmake
!pip install --upgrade setuptools
!pip install ez_setup
!pip install gym[atari]
!pip install git+https://github.com/A-Maze-Ball/random-agent.git

## The Code

In [10]:
import os
import warnings
import gym

from gym import logger as gymlogger
from gym.wrappers import Monitor
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay
from random_agent import RandomAgent

gymlogger.set_level(40) #error only

# setup the display
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

# gym has an annoying warning that we need to get rid of.
warnings.simplefilter("ignore")

# Helpers for rendering
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    
def wrap_env(env):
    env = Monitor(env, './video', force=True)
    return env

NUM_EPISODES = 1000  #@param {type: "number"}
RENDER_TRAINING = False #@param ["True", "False"] {type:"raw"}
ENV_NAME = 'MountainCar-v0'

class RandomPolicyAgent(object):

    def __init__(self, env):
        super().__init__()
        self.env = env
        self.best_reward = None
        self.best_policy = dict()
    
    def train(self, num_episodes, render_training):
        for _ in range(num_episodes):
            state = bucketize(self.env.reset())
            if render_training:
                self.env.render()

            # Play an episode
            done = False
            total_reward = 0
            policy = dict()
            while not done:
                if state in policy:
                    action = policy[state]
                else:
                    action = self.env.action_space.sample()
                    policy[state] = action

                if state not in self.best_policy:
                    self.best_policy[state] = action

                obs, reward, done, _ = self.env.step(action)
                if render_training:
                    env.render()
                    
                state = bucketize(obs)
                total_reward += reward

            if self.best_reward is None or total_reward > self.best_reward:
                self.best_reward = total_reward
                self.best_policy.update(policy)

        print(f'Best Reward during training: {self.best_reward}')

    def play(self):
        state = bucketize(self.env.reset())
        self.env.render()
        total_reward = 0
        done = False
        while not done:
            if state in self.best_policy:
                action = self.best_policy[state]
            else:
                action = self.env.action_space.sample()

            obs, reward, done, _ = self.env.step(action)
            self.env.render()
            state = bucketize(obs)
            total_reward += reward

        return total_reward
    
def bucketize(state):
    # We have to use a frozenset so it is hashable
    return frozenset([round(a, 2) for a in state])

def main():
    env = wrap_env(gym.make(ENV_NAME))
    random_agent = RandomAgent(env)
    print('Training RandomAgent')
    random_agent.train(NUM_EPISODES, RENDER_TRAINING)
    if RENDER_TRAINING:
        show_video()
  
    random_policy_agent = RandomPolicyAgent(env)
    print('Training RandomPolicyAgent')
    random_policy_agent.train(NUM_EPISODES, RENDER_TRAINING)
    if RENDER_TRAINING:
        show_video()
  
    print('Playing with RandomAgent')
    reward = random_agent.play()
    print(f'Reward from playing with RandomAgent: {reward}')
    show_video()
    print('Playing with RandomPolicyAgent')
    reward = random_policy_agent.play()
    print(f'Reward from playing with RandomPolicyAgent: {reward}')
    show_video()

main()



Training RandomAgent
Best reward during training: -200.0
Training RandomPolicyAgent
State: frozenset({-0.5, 0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.5, -0.0})
State: frozenset({-0.6, -0.0})
State: frozenset({-0.6, -0.0})
State: frozenset({-0.6, -0.0})
State: frozenset({-0.6, -0.0})
State: frozenset({-0.6, -0.0})
State: frozenset({

Playing with RandomPolicyAgent
Reward from playing with RandomPolicyAgent: -200.0
