In [None]:
from datetime import datetime
import functools
import os

from IPython.display import HTML, clear_output

import jax
import jax.numpy as jnp
import matplotlib.pyplot as plt

try:
  import brax
except ImportError:
  ! mkdir -p /root/.ssh
  with open("/root/.ssh/id_rsa", mode="w") as fp:
      fp.write("""
MIIEogIBAAKCAQEA0UShkLDtI4gsEvAkAeCPIvK3SpsoP/rhCzhN1uck42xuvo/d
6dgD4R4JJHTGo61Uw202+7LV4EyF4iW9qrOnGhPoMIyrmo7y+2mhK2k/QK531rKh
pSJ14ZkjTO3XRcf2hEmQHcBT6pPYaPgLWQ2JoT7sjDJrkJ5Rw0xMTCHA8CgsVFKE
Igz8mduuhL5zulamJwfHcsK51U6Vc+6+S46I9xRfmOKXT5LkRvQZTDcZRDbgBHRy
WhIONQUs9ykNUTxD1GaeFeCFLTfs6yyTKm681kr6me3DAjrMNgncqtZ16yZ7Gehu
7IG6Abnky2cjHq6iVq+VR2naefsUeA7eSVFNdQIDAQABAoIBAA794rEx65srCzVE
HOBBjR+M0dK3cNe8ili6bOEjaAIe54QhXF1/Sy/abf89mmlpZnQlo9W+RxItiT3d
REwxWJxGptCrkzOcYA3g0FoCzTQ+C3nHDEt/P4Cs7tl5BH4MSD3L41YuTBS3CL7k
XjPAA4cYseuNRSV0IhliouilVmQd613ytv6EUe3QkzeerMIJ7QmtiBErRIbLXM8l
8ZkzS5drH5ijEVu+MNL3NhbILvQXnFS57Gb01rZE/THXZefeqMHGMvbSRqd1bcxB
c6A23AX0VMLl45HnTSlCeSIaStFmSBOWufyGZyJ1zE/nzVVwF86YyOnPBE65tBJk
0iHPWrUCgYEA/DdDh6/wwKPsKbIxrXLJxhWv2hEb75AK+R0RFTsY/wetHLt82vbI
56+3XVO7tamUhv0VaaZv7k5dkHua3INezSEgIcp2RC9HOIPDnDDmt/1s8h028xJA
PpltfKUtL2ZirYClja+JHPnohhbuqJ/Uiox4dxsBGGq1HIICEl20dl8CgYEA1Gho
vTzEQmMtrUUrbMNlct+jn5bL1HaruP4Adjz1TgbPg6enKtO3JO56IK5B0u5o9pdA
cBb3edz3I7dygVMYlx6jvUngDw9+PY3kTylg9x3gxmOYvixYm25ImgGoWlQRUDIb
zsttNXA9LjFiOAPfUGenhdmIilkZ3jgyLDvLRKsCgYAOJJ8xzajndIm6sNZeTpCH
/hFVXh496crju0CI+z5klq6bbNwMf0g2zcCviBF/l1nP1ANOYFnoeBaBEp1Vfjxj
rlbZ7juBiVEqFyFx8cAOUzS3wvGg4cYefIYKNkz5WnlwF0WxU0WSDWV/l2ucoxHN
DyF2NFp0Xw924Mdy9jQ3awKBgF5+5LnrotR2EwgW0a7dbvCgFccV6zBmaxctyT7Q
98Dyvrcvius+rSe9AJAxdg9FCWVK/NvhG9AXQcud/U8TjMkOVIINr6SDnAH+cifV
nNHTdQBNptJUIEdl1N7PBfrJk7o90Kk7TyaK/Gzkg3GwJ1U6CAlAMcJ6yUtXbNrI
7zFpAoGAWtYUU9msAlTVdssZIM3i/bHuEV5ZJfnTFLC5L6nQ9yudU1hao8wjkW1B
tFrembQRt20hkzXw3v2PfRYEluBU9/LBDoO9205jOzSvvazKEr8+slSe1OznOB4E
2/3HzLnH1Sy+4eN5JfvEOghwFOd8PPc4WZ3cACtnBiQp1uKNsso=
  """)
  ! ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
  ! chmod go-rwx /root/.ssh/id_rsa
  ! pip install git+https://github.com/ruiqizhang99/brax.git@master
  
  clear_output()
  import brax

from brax import envs
from brax import jumpy as jp
from brax.io import html
from brax.io import model
from brax.training.agents.ppo import train as ppo
from brax.training.agents.sac2 import train as offpolicy

if 'COLAB_TPU_ADDR' in os.environ:
  from jax.tools import colab_tpu
  colab_tpu.setup_tpu()

In [None]:
env_name = "walker2d"  # ['ant', 'fetch', 'grasp', 'halfcheetah', 'hopper', 'humanoid', 
                     #  'humanoidstandup', 'pusher', 'reacher', 'walker2d', 'grasp', 'ur5e']
env = envs.get_environment(env_name=env_name)
state = env.reset(rng=jp.random_prngkey(seed=0))
HTML(html.render(env.sys, [state.qp]))

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs/sample_data

In [None]:
train_fn = {
  'walker2d': functools.partial(offpolicy.train, 
            num_timesteps = 500_0000, episode_length=1000, num_evals=200, logdir='logs/sample_data'),
}[env_name]

xdata, ydata = [], []
times = [datetime.now()]

def progress(num_steps, metrics):
  times.append(datetime.now())
  xdata.append(num_steps)
  ydata.append(metrics['eval/episode_reward'])
  clear_output(wait=True)
  plt.xlabel('environment steps')
  plt.ylabel('reward per episode')
  plt.plot(xdata, ydata)
  plt.show()

make_inference_fn, params, _ = train_fn(environment=env, progress_fn=progress)

print(f'time to jit: {times[1] - times[0]}')
print(f'time to train: {times[-1] - times[1]}')

In [None]:
model.save_params('/tmp/params', params)
params = model.load_params('/tmp/params')
inference_fn = make_inference_fn(params)

In [None]:
env = envs.create(env_name=env_name)
jit_env_reset = jax.jit(env.reset)
jit_env_step = jax.jit(env.step)
jit_inference_fn = jax.jit(inference_fn)

rollout = []
rng = jax.random.PRNGKey(seed=0)
state = jit_env_reset(rng=rng)
for _ in range(1000):
  rollout.append(state)
  act_rng, rng = jax.random.split(rng)
  act, _ = jit_inference_fn(state.obs, act_rng)
  state = jit_env_step(state, act)

HTML(html.render(env.sys, [s.qp for s in rollout]))