<a href="https://colab.research.google.com/github/irvinec/mini-assignment-1/blob/master/casey/RandomPolicyAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Casey's answer to mini assgnment 1.

## Install Dependencies

In [1]:
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

!apt-get update
!apt-get install cmake
!pip install --upgrade setuptools
!pip install ez_setup
!pip install gym[atari]
!pip install git+https://github.com/A-Maze-Ball/random-agent.git

Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/39/37/f285403a09cc261c56b6574baace1bdcf4b8c7428c8a7239cbba137bc0eb/PyVirtualDisplay-0.2.1.tar.gz
Collecting EasyProcess (from pyvirtualdisplay)
  Downloading https://files.pythonhosted.org/packages/45/3a/4eecc0c7995a13a64739bbedc0d3691fc574245b7e79cff81905aa0c2b38/EasyProcess-0.2.5.tar.gz
Building wheels for collected packages: pyvirtualdisplay, EasyProcess
  Running setup.py bdist_wheel for pyvirtualdisplay ... [?25l- \ | / done
[?25h  Stored in directory: /root/.cache/pip/wheels/d1/8c/16/1c64227974ae29c687e4cc30fd691d5c0fd40f54446dde99da
  Running setup.py bdist_wheel for EasyProcess ... [?25l- \ | / done
[?25h  Stored in directory: /root/.cache/pip/wheels/41/22/19/af15ef6264c58b625a82641ed7483ad05e258fbd8925505227
Successfully built pyvirtualdisplay EasyProcess
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.2.5 pyvirtualdisplay-0.2.

## The Code

In [3]:
import os
import warnings
import gym

from gym import logger as gymlogger
from gym.wrappers import Monitor
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay
from random_agent import RandomAgent

gymlogger.set_level(40) #error only

# setup the display
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

# gym has an annoying warning that we need to get rid of.
warnings.simplefilter("ignore")

# Helpers for rendering
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    
def wrap_env(env):
    env = Monitor(env, './video', force=True)
    return env

NUM_EPISODES = 10000  #@param {type: "number"}
NUM_PLAY_EPISODES = 1000 #@param {type: "number"}
RENDER_TRAINING = False #@param ["True", "False"] {type:"raw"}
RENDER_PLAY = False #@param ["True", "False"] {type:"raw"}
ENV_NAME = 'FrozenLake-v0'
SHOW_VIDEO = False


class RandomPolicyAgent(object):

    def __init__(self, env):
        super().__init__()
        self.env = env
        self.best_reward = None
        self.best_policy = dict()
    
    def train(self, num_episodes, render_training):
        for _ in range(num_episodes):
            state = self.env.reset()
            if render_training:
                self.env.render()

            # Play an episode
            done = False
            total_reward = 0
            policy = dict()
            while not done:
                action = None
                if state in policy:
                    action = policy[state]
                else:
                    action = self.env.action_space.sample()
                    policy[state] = action

                if state not in self.best_policy:
                    self.best_policy[state] = action

                state, reward, done, _ = self.env.step(action)
                if render_training:
                    env.render()

                total_reward += reward

            if self.best_reward is None or total_reward > self.best_reward:
                self.best_reward = total_reward
                self.best_policy.update(policy)

        print(f'Best Reward during training: {self.best_reward}')

    def play(self, render_play=True):
        state = self.env.reset()
        if render_play:
            self.env.render()

        total_reward = 0
        done = False
        while not done:
            action = None
            if state in self.best_policy:
                action = self.best_policy[state]
            else:
                action = self.env.action_space.sample()

            state, reward, done, _ = self.env.step(action)
            if render_play:
                self.env.render()

            total_reward += reward

        return total_reward

def main():
    env = gym.make(ENV_NAME)
    if SHOW_VIDEO:
        env = wrap_env

    random_agent = RandomAgent(env)
    print('Training RandomAgent')
    random_agent.train(NUM_EPISODES, RENDER_TRAINING)
    if RENDER_TRAINING and SHOW_VIDEO:
        show_video()
  
    random_policy_agent = RandomPolicyAgent(env)
    print('Training RandomPolicyAgent')
    random_policy_agent.train(NUM_EPISODES, RENDER_TRAINING)
    if RENDER_TRAINING and SHOW_VIDEO:
        show_video()
  
    print('Playing with RandomAgent')
    total_rewards = 0
    for _ in range(NUM_PLAY_EPISODES):
        total_rewards += random_agent.play(RENDER_PLAY)

    print(f'Reward from playing with RandomAgent: {total_rewards}')
    if RENDER_PLAY and SHOW_VIDEO:
        show_video()

    print('Playing with RandomPolicyAgent')
    total_rewards = 0
    for _ in range(NUM_PLAY_EPISODES):
        total_rewards += random_policy_agent.play(RENDER_PLAY)

    print(f'Reward from playing with RandomPolicyAgent: {total_rewards}')
    if RENDER_PLAY and SHOW_VIDEO:
        show_video()

main()



Training RandomAgent
Best reward during training: 1.0
Training RandomPolicyAgent
Best Reward during training: 1.0
Playing with RandomAgent
Reward from playing with RandomAgent: 27.0
Playing with RandomPolicyAgent
Reward from playing with RandomPolicyAgent: 311.0
