<a href="https://colab.research.google.com/github/JHyunjun/SNU/blob/main/OpenAI_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Basic Setups to use OpenAI Gym with Colab

In [21]:
# For colab monitor
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# 0. Connect google drive with Colab

In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
%cd /content/drive/MyDrive/Colab Notebooks/snu/8주_RL/강의자료/실습강의/day1/setting
!ls

/content/drive/MyDrive/Colab Notebooks/snu/8주_RL/강의자료/실습강의/day1/setting
day1_setting.pdf  Humanoid_expert.py  setup.ipynb  torch_test.py


# 1. Introduction to Gym environment

## 1.1 Prerequisites

In [24]:
import gym
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
      mp4 = mp4list[0]
      video = io.open(mp4, 'r+b').read()
      encoded = base64.b64encode(video)
      ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                  loop controls style="height: 400px;">
                  <source src="data:video/mp4;base64,{0}" type="video/mp4" />
              </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    

def wrap_env(env):
    env = Monitor(env, './video', force=True)
    return env

Now, we can render Gtm environment!

If you run in jupyter, turn 

```
colab = False
```

In [25]:
colab = True

## 1.2 Basic environments

In [26]:
import gym

# TODO : Render classic control problems below
# CartPole-v1
# MountainCar-v0
# Pendulum-v0
# Visit https://gym.openai.com/envs/#classic_control for more environments!

env_id = 'Acrobot-v1'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

In [27]:
# More complex problems
!pip3 install box2d-py
!pip3 install gym[Box_2D]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [28]:
import gym

# TODO : Render Box2d environments below
# BipedalWalker-v3
# LunarLander-v2
# CarRacing-v0
# Visit https://gym.openai.com/envs/#box2d for more environments!

env_id = 'CarRacing-v0'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()



Track generation: 985..1241 -> 256-tiles track


## 1.3 Atari environment

In [29]:
# Import atatri rom file included in colab
!python -m atari_py.import_roms /content/drive/MyDrive/Colab\ Notebooks/rl-hyundai/ROMS

In [35]:
import gym

# TODO : Render below environments!
# Atlantis-ram-v0
# Breakout-ram-v0
# ...
# Visit https://gym.openai.com/envs/#atari for more environments!

env_id = 'Breakout-ram-v0'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

Exception: ignored

## 1.4 Robotics environment

In [31]:
# Install Robot simulator pybullet
!pip install pybullet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pybullet
  Downloading pybullet-3.2.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (91.7 MB)
[K     |████████████████████████████████| 91.7 MB 22 kB/s 
[?25hInstalling collected packages: pybullet
Successfully installed pybullet-3.2.5


In [37]:
import pybullet_envs

# TODO : Render below environments!
# HalfCheetahBulletEnv-v0
# HopperBulletEnv-v0
# HumanoidBulletEnv-v0
# Walker2DBulletEnv-v0

env_id = 'HumanoidBulletEnv-v0'

env = gym.make(env_id)

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

# 2. Run pretrained RL agent within Gym environment

In [46]:
import pybullet_envs
from Humanoid_expert import * # Pretrained model from https://github.com/benelot/pybullet-gym

env_id = 'HumanoidBulletEnv-v0'

# Build environment with env ID!
env = gym.make(env_id)
if colab:
    env = wrap_env(env)

# Initialize environment
observation = env.reset()

# Call trained policy!
pi = SmallReactivePolicy(env.observation_space, env.action_space)

for t in range(200):
    env.render()

    # TODO : use policy pi to get action!
    # Hint : policy is mapping from state(observation) to action : pi(a|s)
    action = pi.act(observation)
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;

# Close environment
env.close()

# Render environment
if colab:
    show_video()

#3. Torch gradient calculation test

In [42]:
import torch

x = torch.randn((5, 5), requires_grad=True)
y = torch.randn((5, 5), requires_grad=True)
z = torch.randn((5, 5), requires_grad=True)

v = (x + y) * z
w = torch.sum(v)

# w = (x + y) * z
w.backward()

# dw / dx  = z
print(z)
print(x.grad)
print('')

# dw / dz  = x + y
print(x+y)
print(z.grad)

tensor([[ 1.3702, -1.1029, -0.6243,  0.4473, -0.8288],
        [-0.9376,  0.0385,  0.5676,  0.1163,  1.4815],
        [-1.9428,  0.2366, -1.4693, -1.0240,  0.8580],
        [-0.9766,  0.0686,  1.1936,  0.7849, -0.5743],
        [ 0.5805, -1.2205, -2.2695, -0.8722, -1.4596]], requires_grad=True)
tensor([[ 1.3702, -1.1029, -0.6243,  0.4473, -0.8288],
        [-0.9376,  0.0385,  0.5676,  0.1163,  1.4815],
        [-1.9428,  0.2366, -1.4693, -1.0240,  0.8580],
        [-0.9766,  0.0686,  1.1936,  0.7849, -0.5743],
        [ 0.5805, -1.2205, -2.2695, -0.8722, -1.4596]])

tensor([[-1.6149, -1.2615,  0.4814,  0.2395,  0.8732],
        [ 0.2892, -1.7163,  1.1373, -1.5742, -0.5649],
        [ 1.7739, -0.0674, -2.3988, -1.8158, -0.2015],
        [ 0.0166,  1.9768,  1.7354,  0.9414,  0.7274],
        [-0.7277,  0.2272,  0.0436, -1.0659,  0.0740]], grad_fn=<AddBackward0>)
tensor([[-1.6149, -1.2615,  0.4814,  0.2395,  0.8732],
        [ 0.2892, -1.7163,  1.1373, -1.5742, -0.5649],
        [ 1.7739,