In [1]:
from ilqr import iLQR
import gym
import numpy as np
import pandas as pd
import time

from aprl.agents import MujocoFiniteDiffDynamicsPerformance
from aprl.envs.mujoco_costs import get_cost
from experiments.common import set_seeds, make_env, fit_ilqr, \
                               on_iteration, receding, multi_evaluate



Logging to /tmp/openai-2019-02-02-16-10-57-510401
Choosing the latest nvidia driver: /usr/lib/nvidia-396, among ['/usr/lib/nvidia-375', '/usr/lib/nvidia-396']
Choosing the latest nvidia driver: /usr/lib/nvidia-396, among ['/usr/lib/nvidia-375', '/usr/lib/nvidia-396']


In [2]:
seed = 42
set_seeds(seed)

env_name = 'InvertedDoublePendulum-v2'
env, us_init = make_env(env_name, seed=seed, horizon=100)
dynamics = {'performance': MujocoFiniteDiffDynamicsPerformance(env)}
x0s = {k: dyn.get_state() for k, dyn in dynamics.items()}

costs = {k: get_cost(env_name) for k in dynamics.keys()}
ilqrs = {k: iLQR(dyn, costs[k], len(us_init))  
         for k, dyn in dynamics.items()}

In [3]:
xs, us = fit_ilqr(ilqrs, x0s, us_init)

dict_keys(['performance'])
*** Fitting performance ***
iteration 0 failed 1072.8584477859247 [-0.64029087  3.3499411   0.25855345  0.65627266  7.18869424  2.83578306] [-0.7842171]
iteration 1 failed 1072.8584477859247 [-0.64029087  3.3499411   0.25855345  0.65627266  7.18869424  2.83578306] [-0.7842171]
iteration 2 accepted 1059.9412438513739 [-0.66334038  3.32649992  0.27856225  0.57285335  6.84846271  3.8956121 ] [-0.7839354]
iteration 3 failed 1059.9412438513739 [-0.66334038  3.32649992  0.27856225  0.57285335  6.84846271  3.8956121 ] [-0.7839354]
iteration 4 accepted 915.3045732401388 [-0.59995386  3.0761087   0.20781475  0.04160346  3.4572538  11.10813061] [-0.73891616]
iteration 5 failed 915.3045732401388 [-0.59995386  3.0761087   0.20781475  0.04160346  3.4572538  11.10813061] [-0.73891616]
iteration 6 accepted 357.29716793786497 [ 0.05763148  2.88049626 -0.90645288 -0.08896498 -0.94533524 17.72479941] [-0.6733481]
iteration 7 failed 357.29716793786497 [ 0.05763148  2.88049626 -

# Receding horizon

In [4]:
for underlying, ilqr in ilqrs.items():
    k = 'receding_' + underlying
    dynamics[k] = dynamics[underlying]
    x0s[k] = x0s[underlying]
    xs[k], us[k] = receding(ilqr, x0s[underlying], us_init, seed=seed, horizon=500, step_size=5)

iteration 0 x = [[-2.51713244e-02 -3.13228967e-03  8.41541642e-02  2.78393152e-01
  -8.96038897e-02 -8.23645605e-02]
 [-2.30546428e-02 -2.73084678e-03  8.18630811e-02  1.44947338e-01
   1.69889282e-01 -3.75888164e-01]
 [-2.20716243e-02 -1.47207303e-04  7.71597407e-02  5.16305864e-02
   3.47088796e-01 -5.65405536e-01]
 [-2.18733970e-02  3.90881604e-03  7.09333390e-02 -1.20300173e-02
   4.64519595e-01 -6.80839525e-01]
 [-2.22015715e-02  8.92125159e-03  6.38169963e-02 -5.36618124e-02
   5.38462678e-01 -7.43584246e-01]
 [-2.28637554e-02  1.45145173e-02  5.62601789e-02 -7.88392976e-02
   5.80737757e-01 -7.69035389e-01]], u = [[-0.28724512]
 [-0.20120426]
 [-0.13727081]
 [-0.08946668]
 [-0.05347044]]
iteration 5 x = [[-0.02286376  0.01451452  0.05626018 -0.0788393   0.58073776 -0.76903539]
 [-0.02368409  0.02034934  0.04866416 -0.08529637  0.58679837 -0.75146374]
 [-0.02454028  0.02619815  0.04128686 -0.08601007  0.58353338 -0.7252756 ]
 [-0.02537827  0.03197454  0.03420614 -0.08165661  0.57

iteration 80 x = [[ 0.37626805 -0.08172781  0.03026837  0.91631649 -0.61362242  0.34579807]
 [ 0.38524574 -0.08759201  0.03344333  0.87929411 -0.55969142  0.29008515]
 [ 0.39386686 -0.09295435  0.03611693  0.84499427 -0.51320034  0.24541445]
 [ 0.40215038 -0.09787123  0.03837542  0.81176942 -0.4705586   0.20697265]
 [ 0.41010325 -0.10237247  0.04026901  0.77885978 -0.43003333  0.1723589 ]
 [ 0.41772703 -0.10647517  0.04182997  0.74594703 -0.39081547  0.14037307]], u = [[-0.09098531]
 [-0.08601793]
 [-0.08451343]
 [-0.08457507]
 [-0.08525875]]
iteration 85 x = [[ 0.41772703 -0.10647517  0.04182997  0.74594703 -0.39081547  0.14037307]
 [ 0.42501653 -0.11018108  0.0430694   0.71199786 -0.35064253  0.10798828]
 [ 0.43195862 -0.11347431  0.04397456  0.67646215 -0.30824715  0.07345022]
 [ 0.43854221 -0.11634165  0.04453457  0.64029109 -0.26542735  0.03888345]
 [ 0.44476378 -0.11878389  0.04475468  0.60405316 -0.22318919  0.00539661]
 [ 0.4506245  -0.12080981  0.04464862  0.56811576 -0.182126

iteration 155 x = [[ 0.61074729 -0.27052854  0.05947605  0.13586545 -0.87253849  0.66368346]
 [ 0.61203455 -0.2794335   0.06646365  0.12165815 -0.90907194  0.73505687]
 [ 0.6131241  -0.28860997  0.0740636   0.09633687 -0.92691694  0.78633285]
 [ 0.61384641 -0.29776006  0.08194229  0.04823434 -0.9039172   0.79107358]
 [ 0.61392272 -0.30638051  0.08952422 -0.03283069 -0.82111474  0.72727089]
 [ 0.61300853 -0.31384818  0.0960995  -0.14984912 -0.67341248  0.58987103]], u = [[-0.0660605 ]
 [-0.09213167]
 [-0.14488514]
 [-0.22126154]
 [-0.30502277]]
iteration 160 x = [[ 0.61300853 -0.31384818  0.0960995  -0.14984912 -0.67341248  0.58987103]
 [ 0.61082344 -0.31966633  0.10111849 -0.28702979 -0.49108001  0.41574954]
 [ 0.60724633 -0.32363888  0.10439094 -0.42828995 -0.30404693  0.24004649]
 [ 0.60233088 -0.32589266  0.10610202 -0.55474727 -0.14704922  0.10289142]
 [ 0.59630411 -0.32687215  0.10680198 -0.65058658 -0.04898485  0.03738737]
 [ 0.58939777 -0.32702227  0.10703024 -0.73067576  0.0189

iteration 235 x = [[-0.33731612 -0.2903752  -0.00627519 -0.22697223 -2.59512292  2.13817253]
 [-0.34089199 -0.31402203  0.01212576 -0.48731783 -2.13837303  1.54956797]
 [-0.34606298 -0.33513921  0.02728795 -0.54663322 -2.08679175  1.48612098]
 [-0.35092068 -0.35752471  0.04408106 -0.42523261 -2.38993202  1.87177711]
 [-0.3561064  -0.38002452  0.06112428 -0.6112537  -2.11347276  1.54348729]
 [-0.36002145 -0.40566019  0.08211103 -0.17345274 -3.00779385  2.64189952]], u = [[-0.61456202]
 [-0.16512525]
 [ 0.24959343]
 [-0.46397534]
 [ 1.00798695]]
iteration 240 x = [[-0.36002145 -0.40566019  0.08211103 -0.17345274 -3.00779385  2.64189952]
 [-0.35961634 -0.44003458  0.11365648  0.25208517 -3.85827976  3.64821113]
 [-0.35686309 -0.47942892  0.15104621  0.29806715 -4.0198162   3.82692646]
 [-0.35325033 -0.52110289  0.1908527   0.42336831 -4.31135495  4.12496373]
 [-0.35014318 -0.56287243  0.23075813  0.19922731 -4.04961031  3.87036463]
 [-0.34953713 -0.60179704  0.26816561 -0.0766718  -3.7434

iteration 315 x = [[-9.73704737e-01 -3.74463088e+00 -2.42063280e+00 -1.46313808e+00
  -1.10705100e+01  6.23293131e+00]
 [-9.89456227e-01 -3.85388428e+00 -2.34960259e+00 -1.68423705e+00
  -1.07671274e+01  7.96566886e+00]
 [-1.00186028e+00 -3.95462984e+00 -2.27513350e+00 -6.13782364e-01
  -9.22946207e+00  6.44001031e+00]
 [-1.00449166e+00 -4.04183466e+00 -2.21516248e+00 -7.55155001e-03
  -8.31585522e+00  5.75610589e+00]
 [-1.00320866e+00 -4.12216762e+00 -2.15745859e+00  2.18443385e-01
  -7.79265136e+00  5.88242259e+00]
 [-9.99982861e-01 -4.19776042e+00 -2.09815660e+00  4.24664127e-01
  -7.33518070e+00  5.96917438e+00]], u = [[-1.43044162]
 [-0.89352747]
 [ 0.02295803]
 [-0.02439513]
 [ 0.32464519]]
iteration 320 x = [[-0.99998286 -4.19776042 -2.0981566   0.42466413 -7.3351807   5.96917438]
 [-0.99492863 -4.26913029 -2.03778657  0.5845     -6.9460433   6.09756368]
 [-0.98861169 -4.33693424 -1.97560069  0.67755201 -6.62010567  6.33355836]
 [-0.98287852 -4.40212711 -1.90743822  0.46840134 -

iteration 385 x = [[ 8.19861403e-01 -6.35664665e+00  4.49565815e-03 -2.89206645e+00
   5.76261044e+00 -4.06655364e+00]
 [ 7.89761114e-01 -6.29669180e+00 -3.90543784e-02 -3.12753485e+00
   6.22757090e+00 -4.63772264e+00]
 [ 7.59674886e-01 -6.23698039e+00 -8.19249579e-02 -2.88920765e+00
   5.71533145e+00 -3.93617819e+00]
 [ 7.33131199e-01 -6.18469351e+00 -1.14902100e-01 -2.42015424e+00
   4.74824949e+00 -2.67334316e+00]
 [ 7.06642595e-01 -6.13258685e+00 -1.47142612e-01 -2.87576532e+00
   5.66512392e+00 -3.75279283e+00]
 [ 6.78323124e-01 -6.07682746e+00 -1.83174852e-01 -2.78807137e+00
   5.48871714e+00 -3.45618442e+00]], u = [[-0.50184745]
 [ 0.50455134]
 [ 1.04108921]
 [-3.89796928]
 [ 0.16946802]]
iteration 390 x = [[ 0.67832312 -6.07682746 -0.18317485 -2.78807137  5.48871714 -3.45618442]
 [ 0.64973054 -6.02057178 -0.21884396 -2.92914536  5.75793949 -3.66460425]
 [ 0.6221608  -5.96616389 -0.25141883 -2.58655557  5.13456352 -2.87415949]
 [ 0.59420234 -5.9110211  -0.28358675 -3.00172448  

iteration 460 x = [[ 0.77599179 -0.89799483 -0.04862613  0.59839478  6.84560381 -4.97756413]
 [ 0.77992073 -0.82760027 -0.10091291  0.1843672   7.24662818 -5.49739158]
 [ 0.77961357 -0.75279594 -0.15888854 -0.24931335  7.72736127 -6.11079559]
 [ 0.77486063 -0.67280103 -0.22331629 -0.70508587  8.28336249 -6.77927586]
 [ 0.76845043 -0.59252886 -0.28567724 -0.57175912  7.76028708 -5.67318745]
 [ 0.76446879 -0.51962123 -0.33363748 -0.21792114  6.81013383 -3.91279223]], u = [[-6.64324223]
 [-1.49488394]
 [-3.51589075]
 [ 0.53670771]
 [ 1.46367683]]
iteration 465 x = [[ 0.76446879 -0.51962123 -0.33363748 -0.21792114  6.81013383 -3.91279223]
 [ 0.76420856 -0.45649483 -0.36395953  0.17089035  5.80896457 -2.15841641]
 [ 0.7679717  -0.40352923 -0.37701674  0.58529208  4.78123983 -0.46582211]
 [ 0.77597297 -0.36090604 -0.37355287  1.01733305  3.74244026  1.14470015]
 [ 0.78835604 -0.32868732 -0.35438695  1.46073826  2.70139398  2.67650922]
 [ 0.80520934 -0.30687024 -0.32023079  1.91066684  1.6626

# Rollouts

In [5]:
multi_evaluate(env, dynamics, x0s, us, render=True)

performance
Creating window glfw
receding_performance


Unnamed: 0,rewards,lengths
performance,935.548815,100
receding_performance,2321.090845,249
