## Basic Setups to use OpenAI Gym with Colab

In [None]:
# For colab monitor
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1



# 0. Connect google drive with Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/rl-hyundai/day1
!ls

/content/drive/MyDrive/Colab Notebooks/rl-hyundai/day1
Humanoid_expert.py	__pycache__  tabular_mdp    video
HumanoidPyBulletEnv.py	setup.ipynb  torch_test.py


# 1. Introduction to Gym environment

## 1.1 Prerequisites

In [None]:
import gym
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
      mp4 = mp4list[0]
      video = io.open(mp4, 'r+b').read()
      encoded = base64.b64encode(video)
      ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                  loop controls style="height: 400px;">
                  <source src="data:video/mp4;base64,{0}" type="video/mp4" />
              </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    

def wrap_env(env):
    env = Monitor(env, './video', force=True)
    return env

Now, we can render Gtm environment!

If you run in jupyter, turn 

```
colab = False
```

In [None]:
colab = True

## 1.2 Basic environments

In [None]:
import gym

# TODO : Render classic control problems below
# CartPole-v1
# MountainCar-v0
# Pendulum-v0
# Visit https://gym.openai.com/envs/#classic_control for more environments!

env_id = 'Acrobot-v1'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

In [None]:
# More complex problems
!pip3 install box2d-py
!pip3 install gym[Box_2D]



In [None]:
import gym

# TODO : Render Box2d environments below
# BipedalWalker-v3
# LunarLander-v2
# CarRacing-v0
# Visit https://gym.openai.com/envs/#box2d for more environments!

env_id = 'CarRacing-v0'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()



Track generation: 1156..1449 -> 293-tiles track


## 1.3 Atari environment

In [None]:
# Import atatri rom file included in colab
!python -m atari_py.import_roms /content/drive/MyDrive/Colab\ Notebooks/rl-hyundai/ROMS

copying koolaid.bin from /content/drive/MyDrive/Colab Notebooks/rl-hyundai/ROMS/Kool-Aid Man (Kool Aid Pitcher Man) (1983) (M Network, Stephen Tatsumi, Jane Terjung - Kool Aid) (MT4648) ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/koolaid.bin
copying trondead.bin from /content/drive/MyDrive/Colab Notebooks/rl-hyundai/ROMS/TRON - Deadly Discs (TRON Joystick) (1983) (M Network - INTV - APh Technological Consulting, Jeff Ronne, Brett Stutz) (MT5662) ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/trondead.bin
copying robotank.bin from /content/drive/MyDrive/Colab Notebooks/rl-hyundai/ROMS/Robot Tank (Robotank) (1983) (Activision, Alan Miller) (AZ-028, AG-028-04) ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/robotank.bin
copying hero.bin from /content/drive/MyDrive/Colab Notebooks/rl-hyundai/ROMS/H.E.R.O. (1984) (Activision, John Van Ryzin) (AZ-036-04) ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/hero.b

In [None]:
import gym

# TODO : Render below environments!
# Atlantis-ram-v0
# Breakout-ram-v0
# ...
# Visit https://gym.openai.com/envs/#atari for more environments!

env_id = 'Breakout-ram-v0'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

## 1.4 Robotics environment

In [None]:
# Install Robot simulator pybullet
!pip install pybullet



In [None]:
import pybullet_envs

# TODO : Render below environments!
# HalfCheetahBulletEnv-v0
# HopperBulletEnv-v0
# HumanoidBulletEnv-v0
# Walker2DBulletEnv-v0

env_id = 'HumanoidBulletEnv-v0'

env = gym.make(env_id)
if colab:
    env = wrap_env(env)
observation = env.reset()

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()



# 2. Run pretrained RL agent within Gym environment

In [None]:
import pybullet_envs
from Humanoid_expert import * # Pretrained model from https://github.com/benelot/pybullet-gym

env_id = 'HumanoidBulletEnv-v0'

env = gym.make(env_id)
if colab:
    env = wrap_env(env)
observation = env.reset()

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

# Call trained policy!
pi = SmallReactivePolicy(env.observation_space, env.action_space)

for t in range(1000):
    env.render()

    # TODO : use policy pi to get action!
    # Hint : policy is mapping from state(observation) to action : pi(a|s)
    TODO = pi.act(TODO)
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()



#3. Torch gradient calculation test

In [None]:
import torch

x = torch.randn((5, 5), requires_grad=True)
y = torch.randn((5, 5), requires_grad=True)
z = torch.randn((5, 5), requires_grad=True)

v = (x + y) * z
w = torch.sum(v)

# w = (x + y) * z
w.backward()

# dw / dx  = z
print(z)
print(x.grad)
print('')

# dw / dz  = x + y
print(x+y)
print(z.grad)

tensor([[ 0.3269,  0.6287, -0.2850, -0.5911,  0.7632],
        [ 0.1488, -1.4184, -0.2215,  0.5258, -0.1353],
        [ 0.3123, -0.0696,  2.1284, -0.3167,  1.4689],
        [-1.2023, -1.6358, -0.2142,  1.5430, -0.8675],
        [ 1.2429, -0.0904,  1.7588, -1.4575, -0.6154]], requires_grad=True)
tensor([[ 0.3269,  0.6287, -0.2850, -0.5911,  0.7632],
        [ 0.1488, -1.4184, -0.2215,  0.5258, -0.1353],
        [ 0.3123, -0.0696,  2.1284, -0.3167,  1.4689],
        [-1.2023, -1.6358, -0.2142,  1.5430, -0.8675],
        [ 1.2429, -0.0904,  1.7588, -1.4575, -0.6154]])

tensor([[ 0.6599, -1.3759,  0.9103, -2.0321, -1.6221],
        [-0.4255,  1.9292, -0.0686, -0.7171, -0.9385],
        [-1.6371,  0.9145, -1.7995,  0.1375, -3.2374],
        [ 1.5805,  0.8429,  1.2803,  0.0632,  0.4840],
        [ 3.4218,  0.2130,  0.8353,  0.4022, -0.8132]], grad_fn=<AddBackward0>)
tensor([[ 0.6599, -1.3759,  0.9103, -2.0321, -1.6221],
        [-0.4255,  1.9292, -0.0686, -0.7171, -0.9385],
        [-1.6371,