In [1]:
import crocoddyl
import mujoco
import mujoco.viewer
import mim_solvers
import numpy as np
import matplotlib.pyplot as plt
import hppfcl
import time
from os.path import dirname, join, abspath
np.set_printoptions(precision=4, linewidth=180)

import pin_utils, mpc_utils
from Kuka_utils import *
from Kuka_model import *
from Kuka_Mujoco_utils import *
from IRL_solver import IRL_Crocoddyl
import pinocchio as pin
from pinocchio.visualize import MeshcatVisualizer

from mim_robots.pybullet.env import BulletEnvWithGround
from mim_robots.robot_loader import load_bullet_wrapper, load_mujoco_model, get_robot_list, load_pinocchio_wrapper
from mim_robots.robot_list import MiM_Robots
from numpy.linalg import norm, solve
from scipy.spatial.transform import Rotation as R
from scipy.optimize import minimize
from scipy.optimize import Bounds


pybullet build time: Nov 28 2023 23:45:17


In [2]:
%run -i 'init.py'

In [3]:
obs_set = obstacle_set()
obs_set.add_obs(np.array([0.65, -0.05, 0.7]), l = 0.05, col_r = 0.15)
obs_set.add_obs(np.array([0.65, -0.15, 0.45]), l = 0.1, col_r = 0.15)
obs_set.add_obs(np.array([0.65, -0.25, 0.7]), l = 0.1, col_r = 0.15)
obs_num = obs_set.obs_num

In [4]:
# endeff frame translation goal
endeff_frame_id = pin_model.getFrameId("contact")
endeff_joint_id = pin_model.getJointId("contact")
# endeff_translation = pin_data.oMf[endeff_frame_id].translation.copy()
endeff_translation = np.array([0.4,  -0.5,  0.35])

In [5]:
w_run = {
    'translation': .1,
    'xReg': 0.1,
    'uReg': 0.0001
}

w_term = {
    'translation': 10,
    'xReg': 0.1
}

# w_run_bad = {
#     'translation': .0,
#     'xReg': 0.1,
#     'uReg': 0.0001
# }

# w_term_bad = {
#     'translation': 0.0,
#     'xReg': 0.1
# }

w_run_bad = {
    'xReg': 0.0,
    'uReg': 0.01,
    'translation': .0
}

w_term_bad = {
    'xReg': 0.01,
    'translation': .01   
}

c=0
for _ in range(obs_set.obs_num): # Adding Obstacle weights
    w_run['collision'+str(c)] = 1000
    w_term['collision'+str(c)] = 1000
    w_run_bad['collision'+str(c)] = 0
    w_term_bad['collision'+str(c)] = 0
    c+=1

w_run = dict(sorted(w_run.items()))
w_term = dict(sorted(w_term.items()))
w_run_bad = dict(sorted(w_run_bad.items()))
w_term_bad = dict(sorted(w_term_bad.items()))

w_run, w_term = normalize_w(w_run, w_term)
w_run_bad, w_term_bad = normalize_w(w_run_bad, w_term_bad)

In [6]:
dt = 1e-2
T = 150

solver, runningDatas, terminalData = init_robot(robot_simulator, q0, v0, obs_set, endeff_translation, w_run, w_term, dt, T)

Keys_run = solver.problem.runningDatas[0].differential.costs.costs.todict().keys()
Keys_term = solver.problem.terminalData.differential.costs.costs.todict().keys()
nr_run = len(Keys_run); nr_term = len(Keys_term); nr = nr_run + nr_term

xs_init = [x0 for i in range(T+1)]
us_init = solver.problem.quasiStatic(xs_init[:-1])

  m.frames[pin_model.getFrameId("universe")].parent,
  obsObj = pin.GeometryObject("obstacle"+str(num_obs),


In [7]:
# Solve 
update_solver_weights(solver, T, w_run, w_term)
solver.termination_tolerance = 1e-4
solver.with_callbacks = False
solver.solve(xs_init, us_init, 500)
xs_opt = solver.xs.copy()
us_opt = solver.us.copy()

In [8]:
## IRL Args
irl_args = {
    'solver': solver,
    'w_run': w_run_bad,
    'w_term': w_term_bad,
    'type': 'autoreg', 'irl_iter': 1, 'sqp_iter': 1, 'next_traj': 'optimal', 'lr': 1.0,
    'dt': dt,
    'line_search_steps': 15, 'line_search_base': 'none',
    'use_bad': False,
    'normalize': False,
    'K_set': 5,
    'N_samples': T,
    'use_best': False,
    'Lambda': 0.01,
    'KL_tol': -10.0,
    'max_iter': 300, 'min_iter': 15,
    'compare_desired': True, 'des_run': w_run, 'des_term': w_term,
    'verbose': True
}

In [9]:
if 'IRL' in globals():
    del(IRL)
IRL = IRL_Crocoddyl(xs_opt, us_opt, irl_args)
IRL.print_info()

IRL Parameters:
Initial Running Weight:  {'collision0': 0.0, 'collision1': 0.0, 'collision2': 0.0, 'translation': 0.0, 'uReg': 1.0, 'xReg': 0.0}
Initial Terminal Weight:  {'collision0': 0.0, 'collision1': 0.0, 'collision2': 0.0, 'translation': 1.0, 'xReg': 1.0}
Type:  autoreg
Set Size:  5
Sample Size:  150
Lambda:  0.01
SQP Iterations:  1
IRL Max Iteration:  300
Sample Time:  0.01


In [10]:
begin_time = time.time()
IRL.solve_irl()
end_time = time.time()
last_x = IRL.last_traj[0]; last_u = IRL.last_traj[1]
best_x = IRL.best_traj[0]; best_u = IRL.best_traj[1]
print('Duration: ', end_time - begin_time)

-- iter -- KL_div ---- KL_des ---- Opt Div --- Cost Diff --- Fcn Val ---- Step ---
||   0  || 0.000025 || 0.000000 || 1.000000 || 0.068061 || 103.986167 || 1.000
||   1  || 0.000000 || 0.000000 || 1.000000 || 0.988208 || 164.805441 || 1.000
||   2  || 0.000000 || 0.000000 || 1.000000 || 3.306626 || 207.957087 || 1.000
||   3  || 0.000000 || 0.000000 || 1.000000 || 7.275779 || 241.427864 || 1.000
||   4  || 0.000000 || 0.000000 || 1.000000 || 13.048266 || 268.775288 || 1.000
||   5  || 0.000000 || 0.000000 || 1.000000 || 20.487625 || 268.774567 || 1.000
||   6  || 0.000000 || 0.000000 || 1.000000 || 29.595646 || 268.773851 || 1.000
||   7  || 0.000000 || 0.000000 || 1.000000 || 40.379783 || 268.773140 || 1.000
||   8  || 0.000000 || 0.000000 || 1.000000 || 52.836860 || 268.772434 || 1.000
||   9  || 0.000000 || 0.000000 || 1.000000 || 66.946016 || 268.771734 || 1.000
||  10  || 0.000000 || 0.000000 || 1.000000 || 82.719795 || 268.771040 || 1.000
||  11  || 0.000000 || 0.000000 || 1.0000

KeyboardInterrupt: 

In [None]:
addr = -1
w_chosen_run = IRL.ws[addr][0]; w_chosen_term = IRL.ws[addr][1]
print('Running')
for i, key in enumerate(IRL.keys_run):
    print(key, ':')
    print('Desired: {:.5f} || Initial: {:.5f} || Chosen: {:.5f}'.format(list(w_run.values())[i],
                                                                        list(w_run_bad.values())[i],
                                                                  w_chosen_run[i]))
    # print('Desired: {:.4f} || Best: {:.4f}'.format(list(w_run.values())[i],IRL.best_w[0][i]))
print()
print('Terminal')
for i, key in enumerate(IRL.keys_term):
    print(key, ':')
    print('Desired: {:.5f} || Initial: {:.5f} || Chosen: {:.5f}'.format(list(w_term.values())[i], 
                                                                        list(w_term_bad.values())[i], 
                                                                  w_chosen_term[i]))
    # print('Desired: {:.4f} || Best: {:.4f}'.format(list(w_term.values())[i], IRL.best_w[1][i]))


In [11]:
# n = addr
n = -1
xs = IRL.Xs[n]
us = IRL.Us[n]
viewer = init_mujoco(mj_model, mj_data, q0, np.zeros(nq), obs_set, endeff_translation)
time.sleep(4)
# for i in range(IRL.max_iter):
#     run_traj(viewer, mj_model, mj_data, pin_model, pin_data, IRL.Xs[i], IRL.Us[i], dt)
run_traj(viewer, mj_model, mj_data, pin_model, pin_data, xs, us, dt)
# run_traj(viewer, mj_model, mj_data, pin_model, pin_data, xs_opt, us_opt, dt)
# run_traj(viewer, mj_model, mj_data, pin_model, pin_data, best_x, best_u, dt)
# run_traj(viewer, mj_model, mj_data, pin_model, pin_data, last_x, last_u, dt)

In [None]:
len(IRL.Xs)