In [1]:
from ilqr import iLQR
import gym
import numpy as np
import pandas as pd
import time

from aprl.agents import MujocoFiniteDiffDynamicsPerformance
from aprl.envs.mujoco_costs import get_cost
from experiments.common import set_seeds, make_env, fit_ilqr, \
                               on_iteration, receding, multi_evaluate



Logging to /tmp/openai-2019-02-02-10-08-39-236921
Choosing the latest nvidia driver: /usr/lib/nvidia-396, among ['/usr/lib/nvidia-375', '/usr/lib/nvidia-396']
Choosing the latest nvidia driver: /usr/lib/nvidia-396, among ['/usr/lib/nvidia-375', '/usr/lib/nvidia-396']


In [2]:
seed = 42
set_seeds(seed)

env_name = 'Reacher-v2'
env, us_init = make_env(env_name, seed=seed)
dynamics = {'performance': MujocoFiniteDiffDynamicsPerformance(env)}
x0s = {k: dyn.get_state() for k, dyn in dynamics.items()}

costs = {k: get_cost(env_name) for k in dynamics.keys()}
ilqrs = {k: iLQR(dyn, costs[k], len(us_init))  
         for k, dyn in dynamics.items()}

In [3]:
xs, us = fit_ilqr(ilqrs, x0s, us_init)

dict_keys(['performance'])
*** Fitting performance ***
iteration -1 converged 39.757191899793796 [  0.07461718   0.78022181   0.02243766   0.07369059  -0.07322035
 -11.69088806   0.           0.        ] [ 0.65788007 -0.99060905]
iteration 0 accepted 11.79790471048815 [ -0.16270258   1.01183255   0.02243766   0.07369059  -0.06253441
 -11.28832474   0.           0.        ] [-0.00395429 -0.4083807 ]
iteration 1 accepted 10.391330789058037 [ -0.37362327   1.33038224   0.02243766   0.07369059  -0.05034111
 -10.55055495   0.           0.        ] [-0.00598577 -0.3689992 ]
iteration 2 accepted 8.910915424005065 [-0.35255453  1.72846764  0.02243766  0.07369059 -0.17105004 -7.75091353
  0.          0.        ] [ 0.01512192 -0.34990373]
iteration 3 accepted 7.259855685710237 [ 0.53500768  3.0174724   0.02243766  0.07369059  1.16668763 -0.30864271
  0.          0.        ] [-0.02086487 -0.11770818]
iteration 4 accepted 7.215047951255387 [-0.70304271  1.79693105  0.02243766  0.07369059 -0.614801

# Receding horizon

In [4]:
for k, ilqr in ilqrs.items():
    underlying = 'receding_' + k
    dynamics[underlying] = dynamics[k]
    x0s[underlying] = x0s[k]
    xs[underlying], us[underlying] = receding(env, ilqr, x0s[k], us_init)

iteration 0 r = -0.19885275929869947, x = [[-0.02517132 -0.00313229  0.02243766  0.07369059 -0.00359075 -0.00120678
   0.          0.        ]
 [-0.02562071  0.000898    0.02243766  0.07369059 -0.0861478   0.80592031
   0.          0.        ]], u = [[-0.04139134  0.40563232]]
iteration 1 r = -0.18194700705423872, x = [[-2.56207070e-02  8.98003214e-04  2.24376640e-02  7.36905865e-02
  -8.61478022e-02  8.05920312e-01  0.00000000e+00  0.00000000e+00]
 [-2.69275494e-02  1.27561750e-02  2.24376640e-02  7.36905865e-02
  -1.75072123e-01  1.56444999e+00  0.00000000e+00  0.00000000e+00]], u = [[-0.0450168  0.3852421]]
iteration 2 r = -0.17161547530052002, x = [[-0.02692755  0.01275618  0.02243766  0.07369059 -0.17507212  1.56444999
   0.          0.        ]
 [-0.02917178  0.03193405  0.02243766  0.07369059 -0.27360869  2.26994945
   0.          0.        ]], u = [[-0.05030746  0.36238038]]
iteration 3 r = -0.1551853714093849, x = [[-0.02917178  0.03193405  0.02243766  0.07369059 -0.27360869  

iteration 30 r = -0.024104450056484022, x = [[-0.29022864  1.65590863  0.02243766  0.07369059 -0.26349378  5.52942181
   0.          0.        ]
 [-0.29204757  1.71009481  0.02243766  0.07369059 -0.10056392  5.30818326
   0.          0.        ]], u = [[ 0.08056087 -0.08353319]]
iteration 31 r = -0.022054259264049664, x = [[-0.29204757  1.71009481  0.02243766  0.07369059 -0.10056392  5.30818326
   0.          0.        ]
 [-0.2922164   1.76206051  0.02243766  0.07369059  0.06651997  5.0853274
   0.          0.        ]], u = [[ 0.08346432 -0.08545292]]
iteration 32 r = -0.0245226021291908, x = [[-0.2922164   1.76206051  0.02243766  0.07369059  0.06651997  5.0853274
   0.          0.        ]
 [-0.29075129  1.81177409  0.02243766  0.07369059  0.2262361   4.85776804
   0.          0.        ]], u = [[ 0.08059532 -0.08893248]]
iteration 33 r = -0.028780969640028256, x = [[-0.29075129  1.81177409  0.02243766  0.07369059  0.2262361   4.85776804
   0.          0.        ]
 [-0.28770563  1.85

# Rollouts

In [5]:
multi_evaluate(env, dynamics, x0s, us)

performance
receding_performance


Unnamed: 0,rewards,lengths
performance,-6.763944,50
receding_performance,-7.486454,50
