In [1]:
%load_ext autoreload
%autoreload 2

# %matplotlib inline
%matplotlib widget

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d

import cvxpy as cp
import torch
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

from qpth.qp import QPFunction, QPSolvers

import warnings
warnings.filterwarnings('ignore')

import sys,os
sys.path.append(os.getcwd())

import BallPaddleSystem
import utils

In [2]:
system = BallPaddleSystem.BallPaddleSystem(dt=.05,u_max=2.)
ball_x0_min = np.array([0.25, -1.5])
ball_x0_max = np.array([1.75, 2.])
paddle_x0 = np.array([0.,0.])
ball_xg = np.array([1.,0.])
N = 20

### Generate data of the cost-to-go

In [3]:
dim_pos = 10
dim_vel = 10
ball_x0_pos = np.linspace(ball_x0_min[0], ball_x0_max[0], dim_pos)
ball_x0_vel = np.linspace(ball_x0_min[1], ball_x0_max[1], dim_vel)
BALL_POS, BALL_VEL = np.meshgrid(ball_x0_pos, ball_x0_vel)

In [None]:
BALL_VAL = np.zeros(BALL_POS.shape)
for i in range(BALL_POS.shape[0]):
    for j in range(BALL_POS.shape[1]):
        (prob,objective,constraints,var) = system.get_trajectory_miqp(paddle_x0,[BALL_POS[i,j],BALL_VEL[i,j]],ball_xg,N)
        prob.solve(solver=cp.CPLEX)
        val = objective.value
        if val:
            BALL_VAL[i,j] = val
        else:
            BALL_VAL[i,j] = None
            
        utils.update_progress((i*BALL_POS.shape[1]+j+1)/(BALL_POS.shape[0]*BALL_POS.shape[1]))

In [None]:
# save training data
np.savez('ball_paddle_values', BALL_VAL=BALL_VAL)

In [4]:
# OR load training data
data = np.load('ball_paddle_values.npz')
BALL_VAL = data['BALL_VAL']

In [5]:
# clean the data
np.nan_to_num(BALL_VAL,copy=False,nan=np.nanmax(BALL_VAL));

In [6]:
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(BALL_POS, BALL_VEL, BALL_VAL, rstride=1, cstride=1,
                cmap='plasma', edgecolor='none')
# ax.scatter(BALL_POS, BALL_VEL, BALL_VAL)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x7f1de7aad940>

In [7]:
xy_data = np.vstack((np.reshape(BALL_POS,-1),np.reshape(BALL_VEL,-1))).T
z_label = np.expand_dims(np.reshape(BALL_VAL,-1),axis=1)

# shuffle it
num_data = xy_data.shape[0]
indx = np.arange(num_data)
np.random.shuffle(indx)

xy_data = xy_data[indx,:]
z_label = z_label[indx,:]

### Train a neural network to approximate it

In [8]:
learning_rate = 1e-3
batch_size = 50

nn_width = 12
model = torch.nn.Sequential(
    torch.nn.Linear(2, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, 1)
)
model.double()

loss_fn = torch.nn.MSELoss(reduction='sum')
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

writer = SummaryWriter()
n_iter = 0

In [None]:
# optional: move to GPU
model.cuda()

In [16]:
device = next(model.parameters()).device
xy_data_tensor = torch.from_numpy(xy_data).to(device)
z_label_tensor = torch.from_numpy(z_label).to(device)

num_epoch = 100

for epoch in range(num_epoch):
    batch_start = 0
    while batch_start < num_data:
        batch_end = min(num_data-1,batch_start+batch_size)
        x = xy_data_tensor[batch_start:batch_end,:]
        z = z_label_tensor[batch_start:batch_end,:]
        z_pred = model(x)
        loss = loss_fn(z_pred, z) / batch_size
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_start += batch_size
        n_iter += 1
    if epoch % 10 == 0:
        print("loss: %f" % loss.item())
#         writer.add_scalar('Loss/train', loss.item(), n_iter)

loss: 1.234993
loss: 1.216912
loss: 1.198645
loss: 1.181591
loss: 1.164702
loss: 1.148796
loss: 1.133222
loss: 1.117933
loss: 1.102833
loss: 1.087311


In [None]:
writer.close()

#### Find where the neural network over-approximates the most (becomes inadmissible)

In [17]:
(prob,objective,constraints,var) = system.get_adversarial_miqp(model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob.solve(solver=cp.CPLEX)

worst_input = np.array(var['zb'].value[:,0])
worst_bi = np.array(var['bi'].value)
worst_v = np.array([v.value for v in var['v']])

print(objective.value)

-3.617129173895883


In [22]:
# check if the relaxed problem is far off
# (prob,objective,constraints,var) = system.get_adversarial_qp(model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
(prob,objective,constraints,var) = system.get_adversarial_qp(model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N,bi=worst_bi,v=worst_v)
prob.solve(verbose=False,solver=cp.CPLEX)

print(objective.value)

-3.6171291737248996


In [20]:
# and diff solver accurate
# prob = system.get_adversarial_qp_standard(model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob = system.get_adversarial_qp_standard(model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N,bi=worst_bi,v=worst_v)
qp_fun = QPFunction(verbose=False,solver=QPSolvers.CVXPY,check_Q_spd=True)
x_adv = qp_fun(prob.Q + 1e-6*torch.eye(prob.num_vars).type(torch.DoubleTensor).to(device), prob.q, prob.G, prob.h, prob.A, prob.b)
r = prob.eval_obj(x_adv)

print(r.item())

z = cp.Variable(prob.Q.shape[0])
obj = cp.Minimize(.5 * cp.quad_form(z, prob.Q.detach().numpy()) + prob.q.detach().numpy() * z)
con = [prob.G.detach().numpy()*z <= prob.h.detach().numpy(), prob.A.detach().numpy()*z == prob.b.detach().numpy()]
cp_prob = cp.Problem(obj,con)
cp_prob.solve(solver=cp.CPLEX,verbose=False)

-3.617128977456618


-3.617129173859638

### Retrain the network but with admissibility regularization and check again

In [63]:
learning_rate = 1e-3
batch_size = 50

nn_width = 12
admissible_model = torch.nn.Sequential(
    torch.nn.Linear(2, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, 1)
)
admissible_model.double()

loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(admissible_model.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(admissible_model.parameters(), lr=learning_rate)

# writer = SummaryWriter()
n_iter = 0

In [None]:
# optional: move to GPU
admissible_model.cuda()

In [70]:
device = next(admissible_model.parameters()).device
xy_data_tensor = torch.from_numpy(xy_data).to(device)
z_label_tensor = torch.from_numpy(z_label).to(device)

num_epoch = 100

qp_fun = QPFunction(verbose=False,solver=QPSolvers.CVXPY,check_Q_spd=True)

regularize = True

for epoch in range(num_epoch):
    batch_start = 0
    
    while batch_start < num_data:
        batch_end = min(num_data-1,batch_start+batch_size)
        x = xy_data_tensor[batch_start:batch_end,:]
        z = z_label_tensor[batch_start:batch_end,:]
        z_pred = admissible_model(x)
        
        if regularize:
            prob = system.get_adversarial_qp_standard(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
            QI = 1e-6*torch.eye(prob.num_vars).to(device).type(prob.dtype)
            x_adv = qp_fun(prob.Q + QI, prob.q, prob.G, prob.h, prob.A, prob.b)
            r = prob.eval_obj(x_adv)
            reg_loss = F.relu(-r)
        else:
            reg_loss = 0.
        
        fit_loss = loss_fn(z_pred, z) / batch_size 
    
        loss = fit_loss + reg_loss
    
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        
        batch_start += batch_size
    
    if epoch % 10 == 0:
        if regularize:
            print("reg: %f" % reg_loss.item())
        print("fit: %f" % fit_loss.item())
        print("loss: %f" % loss.item())
#         writer.add_scalar('Admissible/train', loss.item(), n_iter)

reg: 25.117094
fit: 0.900096
loss: 26.017190
reg: 23.110666
fit: 1.749394
loss: 24.860060
reg: 22.715418
fit: 1.888962
loss: 24.604379
reg: 22.425434
fit: 2.019556
loss: 24.444990
reg: 22.217964
fit: 2.117336
loss: 24.335300
reg: 22.047666
fit: 2.199488
loss: 24.247155
reg: 21.903345
fit: 2.268819
loss: 24.172164
reg: 21.776138
fit: 2.332706
loss: 24.108845
reg: 21.665665
fit: 2.391284
loss: 24.056949
reg: 21.563360
fit: 2.451991
loss: 24.015351


In [71]:
(prob,objective,constraints,var) = system.get_adversarial_miqp(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob.solve(solver=cp.CPLEX)

worst_input_admiss = np.array(var['zb'].value[:,0])

print(objective.value)

-2.255926770690712


In [42]:
(prob,objective,constraints,var) = system.get_adversarial_qp(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob.solve()

print(objective.value)

-21.318098218923428


### Trying with adversarial activations

In [77]:
# trying the activation-based approach

learning_rate = 1e-3
batch_size = 50

nn_width = 12
admissible_model = torch.nn.Sequential(
    torch.nn.Linear(2, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, nn_width),
    torch.nn.ReLU(),
    torch.nn.Linear(nn_width, 1)
)
admissible_model.double()

loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(admissible_model.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(admissible_model.parameters(), lr=learning_rate)

In [None]:
# optional: move to GPU
admissible_model.cuda()

In [86]:
device = next(admissible_model.parameters()).device
xy_data_tensor = torch.from_numpy(xy_data).to(device)
z_label_tensor = torch.from_numpy(z_label).to(device)

num_epoch = 100

qp_fun = QPFunction(verbose=False,solver=QPSolvers.CVXPY,check_Q_spd=True)

regularize = True

for epoch in range(num_epoch):
    batch_start = 0
    
    while batch_start < num_data:
        batch_end = min(num_data-1,batch_start+batch_size)
        x = xy_data_tensor[batch_start:batch_end,:]
        z = z_label_tensor[batch_start:batch_end,:]
        z_pred = admissible_model(x)
        fit_loss = loss_fn(z_pred, z) / batch_size
        
        if regularize:
            # find an adverserial example
            with torch.no_grad():
                (prob,objective,constraints,var) = system.get_adversarial_miqp(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
                prob.solve(solver=cp.CPLEX)
                bi = np.array(var['bi'].value)
                v = [np.array(v.value) for v in var['v']]
                #print("adv: %f" % objective.value)
            prob = system.get_adversarial_qp_standard(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N,bi=bi,v=v)    
            QI = 1e-6*torch.eye(prob.num_vars).to(device).type(prob.dtype)
            x_adv = qp_fun(prob.Q + QI, prob.q, prob.G, prob.h, prob.A, prob.b)
            r = prob.eval_obj(x_adv)
            #print("relaxed adv: %f" % r.item())
            reg_loss = F.relu(-r) * 10.
        else:
            reg_loss = 0.
                
        loss = fit_loss + reg_loss
    
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        
        batch_start += batch_size
    
    if epoch % 10 == 0:
        if regularize:
            print("reg: %f" % reg_loss.item())
        print("fit: %f" % fit_loss.item())
        print("loss: %f" % loss.item())

reg: 11.463003
fit: 3.556810
loss: 15.019813
reg: 0.000000
fit: 8.717121
loss: 8.717121
reg: 0.000000
fit: 8.569960
loss: 8.569960
reg: 0.000000
fit: 8.703406
loss: 8.703406
reg: 0.000000
fit: 9.499193
loss: 9.499193

KeyboardInterrupt


KeyboardInterrupt: 

In [87]:
(prob,objective,constraints,var) = system.get_adversarial_miqp(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob.solve(solver=cp.CPLEX)

worst_input_admiss = np.array(var['zb'].value[:,0])

print(objective.value)

0.21064446426117556


In [88]:
(prob,objective,constraints,var) = system.get_adversarial_qp(admissible_model,paddle_x0,ball_x0_min,ball_x0_max,ball_xg,N)
prob.solve()

print(objective.value)

-20.195069236400055


### Plotting the samples and the approximated cost-to-go

In [89]:
print("=== NOT ADMISSIBLE ===")

print("Neural network")
print(worst_input)
with torch.no_grad():
    worst_value = model(torch.from_numpy(worst_input).to(device)).cpu().numpy()
print(worst_value)

print("Optimization problem")
print(worst_input)
(prob,objective,constraints,var) = system.get_trajectory_miqp(paddle_x0,worst_input,ball_xg,N)
prob.solve(solver=cp.CPLEX)
worst_opt_val = objective.value
print(worst_opt_val)

print("Optimization of closest on grid")
worst_input_grid = [utils.find_nearest(ball_x0_pos,worst_input[0]),utils.find_nearest(ball_x0_vel,worst_input[1])]
print(worst_input_grid)
(prob,objective,constraints,var) = system.get_trajectory_miqp(paddle_x0,worst_input_grid,ball_xg,N)
prob.solve(solver=cp.CPLEX)
worst_opt_val_grid = objective.value
print(worst_opt_val_grid)

print("Neural network of closest on grid")
print(worst_input_grid)
with torch.no_grad():
    worst_value_grid = model(torch.from_numpy(np.array(worst_input_grid)).to(device)).cpu().numpy()
print(worst_value_grid)

print("\n=== ADMISSIBLE ===")

print("Admissible neural network (same input as nonadmissible)")
print(worst_input)
with torch.no_grad():
    worst_value_same = admissible_model(torch.from_numpy(worst_input).to(device)).cpu().numpy()
print(worst_value_same)

print("Admissible neural network (worst for this network)")
print(worst_input_admiss)
with torch.no_grad():
    worst_value_admiss = admissible_model(torch.from_numpy(worst_input_admiss).to(device)).cpu().numpy()
print(worst_value_admiss)

print("Optimization problem")
print(worst_input_admiss)
(prob,objective,constraints,var) = system.get_trajectory_miqp(paddle_x0,worst_input_admiss,ball_xg,N)
prob.solve(solver=cp.CPLEX)
worst_opt_val_admiss = objective.value
print(worst_opt_val_admiss)

print("Optimization of closest on grid")
worst_input_grid_admiss = [utils.find_nearest(ball_x0_pos,worst_input_admiss[0]),utils.find_nearest(ball_x0_vel,worst_input_admiss[1])]
print(worst_input_grid_admiss)
(prob,objective,constraints,var) = system.get_trajectory_miqp(paddle_x0,worst_input_grid_admiss,ball_xg,N)
prob.solve(solver=cp.CPLEX)
worst_opt_val_admiss_grid = objective.value
print(worst_opt_val_admiss_grid)

print("Neural network of closest on grid")
print(worst_input_grid_admiss)
with torch.no_grad():
    worst_value_admiss_grid = admissible_model(torch.from_numpy(np.array(worst_input_grid_admiss)).to(device)).cpu().numpy()
print(worst_value_admiss_grid)

=== NOT ADMISSIBLE ===
Neural network
[1.2119568  0.83773706]
[14.13808438]
Optimization problem
[1.2119568  0.83773706]
10.520955209006617
Optimization of closest on grid
[1.25, 0.8333333333333335]
15.371058331511865
Neural network of closest on grid
[1.25, 0.8333333333333335]
[14.55752815]

=== ADMISSIBLE ===
Admissible neural network (same input as nonadmissible)
[1.2119568  0.83773706]
[9.71301762]
Admissible neural network (worst for this network)
[ 0.4962 -1.5   ]
[2.53654325]
Optimization problem
[ 0.4962 -1.5   ]
2.7471877175681088
Optimization of closest on grid
[0.41666666666666663, -1.5]
2.930658841307967
Neural network of closest on grid
[0.41666666666666663, -1.5]
[2.17692674]


In [90]:
with torch.no_grad():
    z_pred = model(torch.from_numpy(xy_data).to(device)).cpu().numpy()
    z_pred_admiss = admissible_model(torch.from_numpy(xy_data).to(device)).cpu().numpy()
    
# unshuffle
z_pred = z_pred[[np.argwhere(indx == i)[0,0] for i in np.arange(num_data)]]
Z_pred = np.reshape(z_pred,BALL_POS.shape)

z_pred_admiss = z_pred_admiss[[np.argwhere(indx == i)[0,0] for i in np.arange(num_data)]]
Z_pred_admiss = np.reshape(z_pred_admiss,BALL_POS.shape)

In [91]:
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(BALL_POS, BALL_VEL, BALL_VAL, rstride=1, cstride=1,
                cmap='plasma', edgecolor='none')
ax.plot_surface(BALL_POS, BALL_VEL, Z_pred, rstride=1, cstride=1,
                cmap='Greys', edgecolor='none')
ax.plot_surface(BALL_POS, BALL_VEL, Z_pred_admiss, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x7f1cf037ef28>

In [92]:
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(BALL_POS, BALL_VEL, np.minimum(BALL_VAL - Z_pred_admiss,0.), rstride=1, cstride=1,
                cmap='Reds', edgecolor='none')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x7f1cf07464e0>