In [1]:
import torch
from torch.autograd import Function, Variable
import torch.nn.functional as F
from torch import nn
from torch.nn.parameter import Parameter
import torch.optim as optim
from torch.nn.utils import parameters_to_vector
from torch.utils.data import TensorDataset, DataLoader

import numpy as np
import numpy.random as npr

import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt

# Comment these lines if your MPC is in the current directory.
# Otherwise modify to the directory.
#import sys
#sys.path.append('./../mpc.pytorch-master/')


from mpc import mpc
from mpc import casadi_control
from mpc.mpc import GradMethods, QuadCost, LinDx
#from mpc.dynamics import NNDynamics
#import mpc.util as eutil
from mpc.env_dx import frenet_kin_bicycle
from mpc.track.src import simple_track_generator, track_functions



#import sys
#from IPython.core import ultratb
#sys.excepthook = ultratb.FormattedTB(mode='Verbose',
#     color_scheme='Linux', call_pdb=1)

import time
import os
import shutil
import pickle as pkl
import collections

import argparse

In [2]:
# Let's try to create a track 

track_density = 300
track_width = 0.5
v_max = 2
gen = simple_track_generator.trackGenerator(track_density,track_width)
track_name = 'DEMO_TRACK'

In [3]:
t = 0.3
init = [0,0,0]

track_function = {
    'DEMO_TRACK'    : track_functions.demo_track,
    'HARD_TRACK'    : track_functions.hard_track,
    'LONG_TRACK'    : track_functions.long_track,
    'LUCERNE_TRACK' : track_functions.lucerne_track,
    'BERN_TRACK'    : track_functions.bern_track,
    'INFINITY_TRACK': track_functions.infinity_track,
    'SNAIL_TRACK'   : track_functions.snail_track
}.get(track_name, track_functions.demo_track)
    
track_function(gen, t, init)
    
gen.populatePointsAndArcLength()
gen.centerTrack()

track_coord = torch.from_numpy(np.vstack([gen.xCoords, gen.yCoords, gen.arcLength, gen.tangentAngle, gen.curvature]))

In [4]:
device = 'cpu'
softplus_op = torch.nn.Softplus(10)

def sample_xinit(n_batch):
    def uniform(shape, low, high):
        r = high-low
        return torch.rand(shape)*r+low

    sigma = uniform(n_batch, 0.01, 2.)
    d = uniform(n_batch, -0.1, 0.1)
    phi = uniform(n_batch, -0.40*np.pi, 0.40*np.pi)
    v = uniform(n_batch, 0., 0.2)
    sigma_0 = sigma
    sigma_diff = sigma-sigma_0
    d_lb = softplus_op(-d-0.5*track_width)
    d_ub = softplus_op(d-0.5*track_width)
    v_lb = softplus_op(-v + 0)
    v_ub = softplus_op(v-v_max)
    xinit = torch.stack((sigma, d, phi, v, sigma_0, sigma_diff, d_lb, d_ub, v_lb, v_ub), dim=1)

    return xinit

true_dx = frenet_kin_bicycle.FrenetKinBicycleDx(track_coord)
mpc_T = 15
n_batch = 8

# Added here the bounds of U
u_lower = torch.tensor([-2., -1.]).unsqueeze(0).unsqueeze(0).repeat(mpc_T, n_batch, 1)
u_upper = torch.tensor([2., 1.]).unsqueeze(0).unsqueeze(0).repeat(mpc_T, n_batch, 1)

n_state = true_dx.n_state
print(n_state)
n_ctrl = true_dx.n_ctrl

u_init=None
eps = 1
lqr_iter = 500
grad_method = GradMethods.AUTO_DIFF

10
10


In [5]:
# casadi mpc with exact penalty
test_q = np.array([ 0.,  6.,  1.,  0., 0., 0., 0., 0., 1., 2.])
test_p = np.array([ -2.,  0.,  0.,  0., 100., 100., 100., 100., -1,  0.])

control = casadi_control.CasadiControl(track_coord)

x0 = (sample_xinit(1)).numpy()
print(x0[0])
dc = 4 #number constraints
df = 2 #number of states we do not really need with casadi, like simga_0 and sigma_diff
dx = n_state #number states
du = n_ctrl #number control inputs
horizon = mpc_T
sol = control.mpc_casadi(test_q,test_p,x0,horizon,df,dc,dx,du,track_width,v_max)
N = horizon
u = sol[-du*N:]
x = sol[:-du*N]
u_r = u.reshape(N,du)
x_r = x.reshape(N+1,dx-df-dc)
print(u_r)
print(x_r)

[ 6.6548526e-01  4.7545321e-02 -9.6970081e-02  1.9080684e-01
  6.6548526e-01  0.0000000e+00  4.9765217e-03  1.2403339e-02
  1.3834067e-02  1.3892311e-09]
curv start
curv end
curv start
curv end
curv start
curv end
solve optimization problem

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.11, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:      304
Number of nonzeros in inequality constraint Jacobian.:       30
Number of nonzeros in Lagrangian Hessian.............:      240

Total number of variables............................:       94
                     variables with only lo

In [6]:
#casadi mpc with state constraints - NOTE: cost parameter vector has now different dimensions
test_q = np.array([ 0.,  6.,  1.,  0., 1., 2.])
test_p = np.array([ -2.,  0.,  0.,  0.,  -1,  0.])

sol = control.mpc_casadi_with_constraints(test_q,test_p,x0,horizon,df,dc,dx,du,track_width,v_max)
N = horizon
u = sol[-du*N:]
x = sol[:-du*N]
u_r = u.reshape(N,du)
x_r = x.reshape(N+1,dx-df-dc)
print(u_r)
print(x_r)

curv start
curv end
curv start
curv end
curv start
curv end
This is Ipopt version 3.14.11, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:      304
Number of nonzeros in inequality constraint Jacobian.:       62
Number of nonzeros in Lagrangian Hessian.............:      225

Total number of variables............................:       94
                     variables with only lower bounds:        0
                variables with lower and upper bounds:        0
                     variables with only upper bounds:        0
Total number of equality constraints.................:       64
Total number of inequality constraints...............:       62
        inequality constraints with only lower bounds:        0
   inequality constraints with lower and upper bounds:       62
        inequality constraints with only upper bounds:        0

iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
   0  1.9964558e+

In [7]:
# each mpc returns a prediction of the states and the inputs over the horizon for all batches and all dimensions,
# they are stored in the following order pred_x = [horizon,batch, dimension].
# To penalize a control behaviour that does not enforce enough progress within a horizon we can try to chose 
# our loss as -pred_x[mpc_T-1,:,0]

def get_loss_progress(x_init, dx, _Q, _p, mpc_T=mpc_T):    
        
        pred_x, pred_u, pred_objs = mpc.MPC(
            dx.n_state, dx.n_ctrl, mpc_T,
            u_lower=u_lower, u_upper=u_upper, u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=0,
            exit_unconverged=False,
            detach_unconverged=True,
            linesearch_decay=dx.linesearch_decay,
            max_linesearch_iter=dx.max_linesearch_iter,
            grad_method=grad_method,
            eps=5,
            n_batch=n_batch,
        )(x_init, QuadCost(_Q, _p), dx)
        
        #I added the second term to account the initial state (to kind of normalize the progress)
        progress_loss = torch.mean(-pred_x[mpc_T-1,:,0] + pred_x[0,:,0])
        
        #penalty_loss = pred_x[]
            
        return progress_loss

In [8]:
params1 = []
learn_q_logit_state = torch.ones(n_state-7, requires_grad=True).to(device)
learn_q_logit_sigma_diff = torch.ones(1, requires_grad=True).to(device)
learn_q_logit_input = torch.ones(n_ctrl, requires_grad=True).to(device)
learn_p_state = torch.zeros(n_state-7, requires_grad=True).to(device)
learn_p_sigma_diff = torch.zeros(1, requires_grad=True).to(device)
learn_p_input = torch.zeros(n_ctrl, requires_grad=True).to(device)
params1 += [learn_q_logit_state, learn_q_logit_sigma_diff, learn_q_logit_input, learn_p_state, learn_p_sigma_diff, learn_p_input]
env_params = true_dx.params
q_penalty = .00*torch.ones(4).to(device)
p_penalty = 10.0*torch.ones(4).to(device)
q_sigma = .00*torch.ones(1).to(device)
p_sigma = .00*torch.ones(1).to(device)
q_sigma_0 = .00*torch.ones(1).to(device)
p_sigma_0 = .00*torch.ones(1).to(device)
track_coord = track_coord.to(device)

In [9]:
#learn_q_logit = torch.ones_like(true_q, requires_grad=True).to(device)
#learn_p = torch.zeros_like(true_p, requires_grad=True).to(device)

In [10]:
params = [{
            'params': params1,
            'lr': 2e-3,
            'alpha': 0.99,
            }]
dx = true_dx.__class__(track_coord,env_params)

opt = optim.RMSprop(params)

10


In [11]:
for i in range(150):

    q = torch.cat((q_sigma,learn_q_logit_state,q_sigma_0,learn_q_logit_sigma_diff,q_penalty,learn_q_logit_input),dim=0)
    p = torch.cat((p_sigma,learn_p_state,p_sigma_0,learn_p_sigma_diff,p_penalty,learn_p_input), dim=0)
    #print(q)
    print(q)

    Q_batch = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(
                mpc_T, n_batch, 1, 1
            )
    p_batch = p.unsqueeze(0).repeat(mpc_T, n_batch, 1)

    x_init = sample_xinit(n_batch).to(device)
    #im_loss = get_loss_cost(x_init, dx, Q_batch, p_batch)
    im_loss = get_loss_progress(x_init, dx, Q_batch, p_batch)

    opt.zero_grad()
    im_loss.backward()
    opt.step()
    
    print('Batch:', i , ' Progression with MPC_T=',mpc_T ,': ', -round(im_loss.item(), 4))

tensor([0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1.],
       grad_fn=<CatBackward0>)


  result = Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
LU, pivots = torch.lu(A, compute_pivots)
should be replaced with
LU, pivots = torch.linalg.lu_factor(A, compute_pivots)
and
LU, pivots, info = torch.lu(A, compute_pivots, get_infos=True)
should be replaced with
LU, pivots, info = torch.linalg.lu_factor_ex(A, compute_pivots) (Triggered internally at ../aten/src/ATen/native/BatchLinearAlgebra.cpp:1992.)
  LU, pivots, infos = torch._lu_with_info(
Note that torch.linalg.lu_solve has its arguments reversed.
X = torch.lu_solve(B, LU, pivots)
should be replaced with
X = torch.linalg.lu_solve(LU, pivots, B) (Triggered internally at ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2149.)
  x_init = -q.unsqueeze(2).lu_solve(*H_lu).squeeze(2) # Clamped in the x assignment.


Batch: 0  Progression with MPC_T= 15 :  0.0551
tensor([0.0000, 1.0200, 0.9800, 0.9800, 0.0000, 0.9800, 0.0000, 0.0000, 0.0000,
        0.0000, 0.9800, 0.9800], grad_fn=<CatBackward0>)
Batch: 1  Progression with MPC_T= 15 :  0.0741
tensor([0.0000, 1.0254, 0.9998, 0.9635, 0.0000, 0.9637, 0.0000, 0.0000, 0.0000,
        0.0000, 0.9633, 0.9721], grad_fn=<CatBackward0>)
Batch: 2  Progression with MPC_T= 15 :  0.1028
tensor([0.0000, 1.0244, 1.0091, 0.9481, 0.0000, 0.9497, 0.0000, 0.0000, 0.0000,
        0.0000, 0.9457, 0.9540], grad_fn=<CatBackward0>)
Batch: 3  Progression with MPC_T= 15 :  0.0739
tensor([0.0000, 1.0163, 1.0232, 0.9385, 0.0000, 0.9413, 0.0000, 0.0000, 0.0000,
        0.0000, 0.9364, 0.9371], grad_fn=<CatBackward0>)
Batch: 4  Progression with MPC_T= 15 :  0.1282
tensor([0.0000, 1.0118, 1.0287, 0.9256, 0.0000, 0.9281, 0.0000, 0.0000, 0.0000,
        0.0000, 0.9222, 0.9279], grad_fn=<CatBackward0>)
Batch: 5  Progression with MPC_T= 15 :  0.0551
tensor([0.0000, 1.0304, 1.0397, 0

Batch: 45  Progression with MPC_T= 15 :  0.1775
tensor([0.0000, 0.9999, 1.2188, 0.7027, 0.0000, 0.7007, 0.0000, 0.0000, 0.0000,
        0.0000, 0.7002, 0.6769], grad_fn=<CatBackward0>)
Batch: 46  Progression with MPC_T= 15 :  0.181
tensor([0.0000, 0.9945, 1.2227, 0.6978, 0.0000, 0.6951, 0.0000, 0.0000, 0.0000,
        0.0000, 0.6948, 0.6741], grad_fn=<CatBackward0>)
Batch: 47  Progression with MPC_T= 15 :  0.1997
tensor([0.0000, 0.9903, 1.2295, 0.6924, 0.0000, 0.6892, 0.0000, 0.0000, 0.0000,
        0.0000, 0.6890, 0.6705], grad_fn=<CatBackward0>)
Batch: 48  Progression with MPC_T= 15 :  0.1559
tensor([0.0000, 0.9924, 1.2312, 0.6882, 0.0000, 0.6852, 0.0000, 0.0000, 0.0000,
        0.0000, 0.6847, 0.6680], grad_fn=<CatBackward0>)
Batch: 49  Progression with MPC_T= 15 :  0.1708
tensor([0.0000, 0.9943, 1.2344, 0.6837, 0.0000, 0.6809, 0.0000, 0.0000, 0.0000,
        0.0000, 0.6797, 0.6628], grad_fn=<CatBackward0>)
Batch: 50  Progression with MPC_T= 15 :  0.2098
tensor([0.0000, 0.9959, 1.23

KeyboardInterrupt: 

In [12]:
# Here we can choose the number of samples we want to test (number of initial states)
N_test = 1

# Whatever I wrote below might be wrong, we have to see if we really can change the mpc_T to Test, 
# it gives weird results sometimes.
# Here we can choose the mpc_T in the test time, which can be much higher than in the training.
# Ideally, we would like to have the whole lap here, I guess. But we need to fix the warnings/errors before.
mpc_T_test = 30

In [13]:
x_init_test = sample_xinit(N_test)

In [14]:
# Below you can put any initial state you want (any that make sense)
#x_init_test = torch.tensor([[1.6, 0.1, -0.8, 0.1,1.6,0,softplus_op(torch.Tensor([-0.1+0.0])), softplus_op(torch.Tensor([-0.1-0.5*track_width])),softplus_op(torch.Tensor([0.1-0.5*track_width])), softplus_op(torch.Tensor([-0.1-0.5*track_width]))]])

In [15]:
Q_test = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(
                mpc_T_test, N_test, 1, 1
            )
p_test = p.unsqueeze(0).repeat(mpc_T_test, N_test, 1)

In [16]:
true_q, true_p = true_dx.get_true_obj()
true_q = torch.Tensor([ 0.,  3.,  1.,  0., 0., 0., 1., 2.])
true_p = torch.Tensor([ -2.,  0.,  0.,  0., 100., 100.,  -1,  0.])

true_q = true_q.to(device)
true_p = true_p.to(device)

expert_Q = torch.diag(true_q).unsqueeze(0).unsqueeze(0).repeat(
            mpc_T_test, N_test, 1, 1
        )
expert_p = true_p.unsqueeze(0).repeat(mpc_T_test, N_test, 1)

In [17]:
# Added here the bounds of U
u_lower_test = torch.tensor([-2., -1.]).unsqueeze(0).unsqueeze(0).repeat(mpc_T_test, N_test, 1)
u_upper_test = torch.tensor([2., 1.]).unsqueeze(0).unsqueeze(0).repeat(mpc_T_test, N_test, 1)

In [18]:
x_mpc_test, u_mpc_test, objs_mpc_test = mpc.MPC(
            n_state, n_ctrl, mpc_T_test,
            u_lower=u_lower_test, u_upper=u_upper_test, u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=0,
            exit_unconverged=False,
            detach_unconverged=True,
            linesearch_decay=dx.linesearch_decay,
            max_linesearch_iter=dx.max_linesearch_iter,
            grad_method=grad_method,
            eps=2,
            n_batch=N_test,
        )(x_init_test, QuadCost(Q_test, p_test), dx)

In [19]:
def frenet_to_cartesian(point_f, ref_path):
    
    def get_nearest_index(point_f, ref_path):
        return ((point_f[0] - ref_path[2,:])**2).argmin()
    
    def compute_x_coord(point_f, ref_path, nearest_index):
        return ref_path[0,nearest_index] - point_f[1]*torch.sin(ref_path[3,nearest_index])
    
    def compute_y_coord(point_f, ref_path, nearest_index):
        return ref_path[1,nearest_index] + point_f[1]*torch.cos(ref_path[3,nearest_index])
    
    nearest_index = get_nearest_index(point_f, ref_path)
    x = compute_x_coord(point_f, ref_path, nearest_index)
    y = compute_y_coord(point_f, ref_path, nearest_index)
    
    return torch.tensor([x, y])

In [20]:
x_list = []
y_list = []

for i in range(mpc_T_test):
    xy = frenet_to_cartesian(x_mpc_test[i,0,:2], track_coord)
    x_list.append(xy[0].numpy())
    y_list.append(xy[1].numpy())

In [21]:
x_plot = np.array(x_list)
y_plot = np.array(y_list)

In [22]:
u_mpc_test.max(0)[0].max(0)[0]

tensor([0.7880, 0.3495], grad_fn=<MaxBackward0>)

In [23]:
fig, ax = plt.subplots(1,1, figsize=(10,5), dpi=120)
gen.plotPoints(ax)
#gen.pointAtArcLength(0)
#gen.writePointsToYaml('../tracks/' + track_name + '.yaml', track_density)

ax.scatter(x_plot, y_plot, s=4, color='red')

print('x_init: ' + str(gen.xCoords[0]))
print('y_init: ' + str(gen.yCoords[0]))
print('yaw_init: ' + str(gen.tangentAngle[0]))
print('Total Arc Length: ' + str(gen.arcLength[-1]/2))
plt.show()

x_init: 0.15000000000000002
y_init: -1.0499999999999994
yaw_init: 0.0
Total Arc Length: 11.568244517641709
