## CVXPyLayers - DAG w LP + L2 Normalization

In [1]:
from cvxpylayers.torch import CvxpyLayer
import torch
import cvxpy as cp

In [2]:
# Define convex optimization model
num_n = 5
num_e = 5
e_out = [
    [0, 1],
    [2, 3],
    [4],
    [],
    [],
]
e_in = [
    [],
    [0],
    [1],
    [2, 4],
    [3],
]

n = cp.Variable(num_n)
e = cp.Variable(num_e)
s_in = cp.Parameter(1)
s_out = cp.Variable(2)
e_hat = cp.Parameter(num_e)

R = 2.0
# objective = cp.norm2(e-e_hat)
objective = -1*e_hat.T@e + R*cp.norm2(e)
bound_constraints = [e >= 0, e <= 1]
flow_constraints = [
    s_in[0] - e[0] - e[1] == 0,
    e[0] - e[2] - e[3] == 0,
    e[1] - e[4] == 0,
    e[2] + e[4] + s_out[0] == 0,
    e[3] + s_out[1] == 0,
    
]
source_sink_constraints = [
    s_in[0] + s_out[0] + s_out[1] == 0,
]
constraints = bound_constraints + flow_constraints + source_sink_constraints

prob = cp.Problem(objective=cp.Minimize(objective), constraints=constraints)
dag_proj_layer = CvxpyLayer(problem=prob, parameters=[e_hat, s_in], variables=[e, s_out])

In [3]:
# layer(torch.Tensor([1.0]), torch.Tensor([1.0, 0.5]))
path_true = torch.Tensor([1.0, 0.0, 0.0, 1.0, 0.0])
e_arg = torch.Tensor([1.5, 1.0, 1.0, 0.0, 1.0])
e_arg.requires_grad = True
s_in_arg = torch.tensor([1.0])
e_res, s_res = dag_proj_layer(e_arg, s_in_arg)
loss = torch.norm(e_res - path_true)
loss.backward()
e_res

tensor([0.5714, 0.4286, 0.5332, 0.0383, 0.4286],
       grad_fn=<_CvxpyLayerFnFnBackward>)

In [4]:
dag_proj_layer(torch.Tensor([e_arg.tolist(), e_arg.tolist()]), torch.Tensor([[1.0], [2.0]]))

(tensor([[5.7143e-01, 4.2857e-01, 5.3315e-01, 3.8278e-02, 4.2857e-01],
         [1.0000e+00, 1.0000e+00, 1.0000e+00, 2.5964e-10, 1.0000e+00]]),
 tensor([[-9.6172e-01, -3.8277e-02],
         [-2.0000e+00, -2.5143e-10]]))

In [5]:
dtype = torch.double
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"

kwargs = {"dtype": dtype, "device": device}

DIM = 5

# We initialize a random tensor
x = torch.rand([128, DIM]).to(**kwargs)
print(x.shape)
source = torch.ones(x.shape[0], 1).to(**kwargs)
print(source.shape)

torch.Size([128, 5])
torch.Size([128, 1])


In [6]:
# Turn its grad on, since we will change this tensor to minimize our loss
x.requires_grad = True
y_true = torch.Tensor([1.0, 0.0, 0.0, 1.0, 0.0]).to(**kwargs)

print(y_true)

# Initialize an SGD optimizer and do 200 steps
optim = torch.optim.Adam([x], 1.0)
# optim = torch.optim.LBFGS([x], lr=1.0)


tensor([1., 0., 0., 1., 0.], dtype=torch.float64)


In [7]:
%%time 
import time
BASE = 1

fwd = 0
bwd = 0

dag_proj, _ = dag_proj_layer(x, source)
loss = torch.norm(dag_proj - y_true, dim=-1).mean()
#         print(perturbed_x)
#         print(dag_proj[:10,:7])
print(torch.abs(dag_proj - y_true).sum().item())
print(loss.item(), torch.sum(x, dim=-1).mean().item())

for iteration in range(10*BASE+1):
    
    def closure():
        optim.zero_grad()
        start = time.time()
        dag_proj, _ = dag_proj_layer(x, source)
        end = time.time()
#         fwd += end - start
        loss = torch.norm(dag_proj - y_true, dim=-1).mean()
    #     + torch.norm(dag_proj - x)
    #     + torch.maximum(
    #         torch.norm(x, dim=-1) - torch.Tensor([6]).to(**kwargs), torch.Tensor([0]).to(**kwargs)
    #     ).mean()
        start = time.time()
        loss.backward()
        end = time.time()
#         bwd += end - start  
        return loss
    optim.step(closure)
    if iteration % BASE== 0:
        dag_proj, _ = dag_proj_layer(x, source)
        loss = torch.norm(dag_proj - y_true, dim=-1).mean()
#         print(perturbed_x)
#         print(dag_proj[:10,:7])
        print(torch.abs(dag_proj - y_true).sum().item())
        print(loss.item(), torch.sum(x, dim=-1).mean().item())
        
print("SGD has run???")
print("TIMES: ", fwd, bwd)

294.5468563408916
1.084792829747162 2.4574028216302395
2.130859157107358
0.010886139049883291 1.4729430384787325
3.3741665058101656e-05
1.4026939905216687e-07 0.8048956090289476
0.000735745340106364
2.8467565811515646e-06 0.2884954141650119
0.0010323381148076647
4.017177457085587e-06 -0.13450568658221435
0.0009104161244135732
3.64385545052734e-06 -0.49203552447066495
0.000850466354221736
3.618872125443003e-06 -0.8001781947872023
0.0010324171423726498
4.386144375576987e-06 -1.0692795110388014
0.001131951184123499
4.822086260605543e-06 -1.3065075481068045
0.0011340416491941153
4.781860065437125e-06 -1.517102126857707
0.0008468956377049555
3.5563269355091067e-06 -1.7050516102331743
0.0010447172125133035
4.292194414994458e-06 -1.8734887336175303
SGD has run???
TIMES:  0 0
Wall time: 11.1 s


In [8]:
dag_proj_layer(x, source)

(tensor([[ 1.0000e+00, -1.2280e-06,  1.0620e-05,  1.0000e+00, -5.9650e-06],
         [ 1.0000e+00,  2.4681e-07,  1.7231e-06,  1.0000e+00, -1.0798e-06],
         [ 1.0000e+00,  2.7237e-11,  5.3234e-12,  1.0000e+00, -3.6284e-11],
         [ 1.0000e+00, -2.7519e-06,  3.4079e-06,  1.0000e+00, -3.6004e-06],
         [ 1.0000e+00, -3.4142e-11,  2.1269e-12,  1.0000e+00,  6.5115e-12],
         [ 1.0000e+00,  5.3991e-06, -6.8064e-06,  1.0000e+00,  3.1478e-06],
         [ 1.0000e+00, -4.1274e-12, -3.6119e-11,  1.0000e+00,  3.3171e-11],
         [ 1.0000e+00, -1.6937e-11,  3.5732e-11,  1.0000e+00, -3.2143e-11],
         [ 1.0000e+00,  1.5951e-11, -6.7720e-13,  1.0000e+00,  1.1795e-11],
         [ 1.0000e+00, -3.3544e-11,  1.1154e-11,  1.0000e+00, -6.3966e-12],
         [ 1.0000e+00,  5.1234e-07,  8.7351e-06,  1.0000e+00, -5.3234e-06],
         [ 1.0000e+00,  1.2884e-12, -1.2046e-11,  1.0000e+00,  5.2730e-12],
         [ 1.0000e+00,  2.8547e-07,  7.3596e-06,  1.0000e+00, -4.0361e-06],
         [ 1

In [9]:
torch.round(dag_proj).sum(dim=0)

tensor([128.,   0.,   0., 128.,   0.], dtype=torch.float64,
       grad_fn=<SumBackward1>)