Sequential Quadratic Programming

Dependencies: `jax`, `scipy`, `numpy`

In [1]:
import jax.numpy as jnp
from jax import jacfwd, jacrev

from scipy.optimize import minimize

Define your own Objective Functions and Constrains here :

(we use rosenbrock function here as an example)

$f(x)=\sum^{N-1}_{i=1}100(x_{i+1}-{x_i}^2)^2+(1-x_i)^2$

In [28]:
#the rosenbrock function
def f(x):
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

In [3]:
import jax.numpy as jnp
from jax import grad, jit, vmap
from jax import random

key = random.PRNGKey(0)



In [4]:
from jax import jacfwd, jacrev

def sigmoid(x):
    return 0.5 * (jnp.tanh(x / 2) + 1)

# Outputs probability of a label being true.
def predict(W, b, inputs):
    return sigmoid(jnp.dot(inputs, W) + b)

# Build a toy dataset.
inputs = jnp.array([[0.52, 1.12,  0.77],
                   [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30],
                   [0.74, -2.49, 1.39]])
targets = jnp.array([True, True, False, True])

# Training loss is the negative log-likelihood of the training examples.
def loss(W, b):
    preds = predict(W, b, inputs)
    label_probs = preds * targets + (1 - preds) * (1 - targets)
    return -jnp.sum(jnp.log(label_probs))

# Initialize random model coefficients
key, W_key, b_key = random.split(key, 3)
W = random.normal(W_key, (3,))
b = random.normal(b_key, ())


# Isolate the function from the weight matrix to the predictions
f = lambda W: predict(W, b, inputs)

J = jacfwd(f)(W)
print("jacfwd result, with shape", J.shape)
print(J)

J = jacrev(f)(W)
print("jacrev result, with shape", J.shape)
print(J)

def hessian(f):
    return jacfwd(jacrev(f))

H = hessian(f)(W)
print("hessian, with shape", H.shape)
print(H)

print("W: ",W)

jacfwd result, with shape (4, 3)
[[ 0.05981758  0.12883787  0.08857603]
 [ 0.04015916 -0.04928625  0.00684531]
 [ 0.12188288  0.01406341 -0.3047072 ]
 [ 0.00140431 -0.00472531  0.00263782]]
jacrev result, with shape (4, 3)
[[ 0.05981757  0.12883787  0.08857603]
 [ 0.04015916 -0.04928625  0.00684531]
 [ 0.12188289  0.01406341 -0.3047072 ]
 [ 0.00140431 -0.00472531  0.00263782]]
hessian, with shape (4, 3, 3)
[[[ 0.02285465  0.04922541  0.03384247]
  [ 0.04922541  0.10602397  0.07289147]
  [ 0.03384247  0.07289147  0.05011288]]

 [[-0.03195215  0.03921401 -0.00544639]
  [ 0.03921401 -0.04812629  0.00668421]
  [-0.00544639  0.00668421 -0.00092836]]

 [[-0.01583708 -0.00182736  0.03959271]
  [-0.00182736 -0.00021085  0.00456839]
  [ 0.03959271  0.00456839 -0.09898177]]

 [[-0.00103524  0.00348343 -0.00194457]
  [ 0.00348343 -0.01172127  0.0065432 ]
  [-0.00194457  0.0065432  -0.00365263]]]
W:  [-0.36838785 -2.275689    0.01144757]


In [5]:
from jax import jacfwd, jacrev

def f(x):
    return jnp.power(x,3).sum()
print(f(jnp.array([1.,2.,3.])))

def hessian(f):
    return jacfwd(jacrev(f))

def jacobian(f):
    return jacfwd(f)

J = jacobian(f)(jnp.array([1.,2.,3.]))
print("jacobian, with shape", J.shape)
print(J)

H = hessian(f)(jnp.array([1.,2.,3.]))
print("hessian, with shape", H.shape)
print(H)

36.0
jacobian, with shape (3,)
[ 3. 12. 27.]
hessian, with shape (3, 3)
[[ 6.  0.  0.]
 [ 0. 12.  0.]
 [ 0.  0. 18.]]


In [30]:
import numpy as np

from scipy.optimize import minimize

from scipy.optimize import Bounds

bounds = Bounds([0, -0.5], [1.0, 2.0])

def rosen(x):

    """The Rosenbrock function"""

    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

def hessian(f):
    return jacfwd(jacrev(f))

def jacobian(f):
    return jacfwd(f)

rosen_der=jacobian(rosen)


ineq_cons = {'type': 'ineq',

             'fun' : lambda x: np.array([1 - x[0] - 2*x[1],

                                         1 - x[0]**2 - x[1],

                                         1 - x[0]**2 + x[1]]),

             'jac' : lambda x: np.array([[-1.0, -2.0],

                                         [-2*x[0], -1.0],

                                         [-2*x[0], 1.0]])}

def eqf(x):
    return jnp.array([2*x[0] + x[1] - 1])
    
def jeq(x):
    return np.array(jacobian(eqf)(x))
    

eq_cons = {'type': 'eq',

           'fun' : lambda x: np.array([2*x[0] + x[1] - 1]),

           #'jac' : lambda x: np.array([2.0, 1.0])}
           'jac' : lambda x: np.array(jeq(x))}

x0 = np.array([0.5, 0])
print("1: ",jeq(x0))
res = minimize(rosen, x0, method='SLSQP', jac=rosen_der,

              constraints=[eq_cons,ineq_cons], options={'ftol': 1e-9, 'disp': True})

print(res)

1:  [[2. 1.]]
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.34271757499371747
            Iterations: 4
            Function evaluations: 5
            Gradient evaluations: 4
     fun: 0.34271757499371747
     jac: array([-0.82676458, -0.41372478])
 message: 'Optimization terminated successfully'
    nfev: 5
     nit: 4
    njev: 4
  status: 0
 success: True
       x: array([0.41494475, 0.1701105 ])


In [7]:

def rosen(x):

    """The Rosenbrock function"""

    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

def hessian(f):
    return jacfwd(jacrev(f))

def jacobian(f):
    return jacfwd(f)

df=jacobian(rosen)

def c(x):
    return 2*x[0]+x[1]-1

A=jacobian(c)
C=hessian(c)
W=hessian(rosen)

x0 = np.array([0.5, 0])

def phi(x,lamda):
    return np.linalg.norm((df(x)-A(x)*lamda),ord=2)**2+c(x)**2
beta=0.8
epsi=1e-9
xk=x0
lamdak=1

def left(x,lamda):
    return np.vstack((np.hstack((W(x)-lamda*C(x),np.reshape(-A(x),(2,1)))),np.hstack((np.reshape(-A(x),(1,2)),np.reshape([0],(1,1))))))

def right(x,lamda):
    return -np.vstack((np.reshape(df(x),(2,1))-lamda*np.reshape(A(x),(2,1)),-np.reshape(c(x),(1,1))))

while phi(xk,lamdak)>epsi:
    dx=np.reshape(np.linalg.solve(left(xk,lamdak),right(xk,lamdak)),(3))
    alpha=1
    xkk=xk+alpha*np.array([dx[0],dx[1]])
    lamdakk=lamdak+alpha*dx[2]
    while(phi(xkk,lamdakk)>(1-beta*alpha)*phi(xk,lamdak)):
        alpha=alpha/4
        xkk=xk+alpha*np.array([dx[0],dx[1]])
        lamdakk=lamdak+alpha*dx[2]
    xk=xkk
    lamdak=lamdakk
#dx=np.reshape(np.linalg.solve(left(xk,lamdak),right(xk,lamdak)),(3))
#print(np.reshape(dx,(3)))
#print(xk+alpha*np.array([dx[0],dx[1]]))
print(xk,lamdak,phi(xk,lamdak))

[0.41494432 0.17011135] -0.41348410536118446 2.4584779439852858e-11


In [8]:
W=jax.hessian(f)
g=jacfwd(f)

(1,)


In [None]:
def sqp_ker(f,eq,ineq,x0,epsi=1e-9,sigma=1,rho=0.8):
    W=jax.hessian(f)
    g=jacfwd(f)
    


In [34]:
from scipy.optimize import Bounds

bounds = Bounds([0, -0.5], [1.0, 2.0])
def rosen(x):

    """The Rosenbrock function"""

    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
def rosen_hess(x):

    x = np.asarray(x)

    H = np.diag(-400*x[:-1],1) - np.diag(400*x[:-1],-1)

    diagonal = np.zeros_like(x)

    diagonal[0] = 1200*x[0]**2-400*x[1]+2

    diagonal[-1] = 200

    diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]

    H = H + np.diag(diagonal)

    return H

from scipy.optimize import LinearConstraint

linear_constraint = LinearConstraint([[1, 2], [2, 1]], [-np.inf, 1], [1, 1])

def cons_f(x):

    return [x[0]**2 + x[1], x[0]**2 - x[1]]

def cons_J(x):

    return [[2*x[0], 1], [2*x[0], -1]]

def cons_H(x, v):

    return v[0]*np.array([[2, 0], [0, 0]]) + v[1]*np.array([[2, 0], [0, 0]])

from scipy.optimize import NonlinearConstraint

nonlinear_constraint = NonlinearConstraint(cons_f, -np.inf, 1, jac=cons_J, hess=cons_H)

x0 = np.array([0.5, 0])

res = minimize(rosen, x0, method='trust-constr', jac=rosen_der, hess=rosen_hess,

               constraints=[linear_constraint, nonlinear_constraint],

               options={'verbose': 1})
print(res)


`gtol` termination condition is satisfied.
Number of iterations: 10, function evaluations: 7, CG iterations: 6, optimality: 5.27e-09, constraint violation: 0.00e+00, execution time: 0.063 s.
 barrier_parameter: 0.0008000000000000003
 barrier_tolerance: 0.0008000000000000003
          cg_niter: 6
      cg_stop_cond: 1
            constr: [array([0.75515031, 1.        ]), array([0.34228362, 0.00208321])]
       constr_nfev: [0, 7]
       constr_nhev: [0, 11]
       constr_njev: [0, 7]
    constr_penalty: 1.0
  constr_violation: 0.0
    execution_time: 0.06307411193847656
               fun: 0.34271759984762334
              grad: array([-0.8243288 , -0.41664243], dtype=float32)
               jac: [array([[1., 2.],
       [2., 1.]]), array([[ 0.8298998,  1.       ],
       [ 0.8298998, -1.       ]])]
   lagrangian_grad: array([-2.6501596e-09,  5.2692299e-09], dtype=float32)
           message: '`gtol` termination condition is satisfied.'
            method: 'tr_interior_point'
          