# Fold-opt Layer with PGD
1. Define PGD update step with analytical projection
2. Define solver `solve_closed_form`

In [1]:
import torch
from torch import nn
import math
import cvxpy as cp

In [2]:
import sys

sys.path.insert(0, 'E:\\User\\Stevens\\MyRepo\\FDFL\\helper')
# sys.path.insert(0, 'E:\\User\\Stevens\\Code\\Fold-opt\\fold_opt')
from myutil import *
from GMRES import *
from fold_opt import *

Auto-Sklearn cannot be imported.


# PGD

 -------------------------------------------------------------
 1.  Projection onto ${d \geq 0 , c^ {\top} d \leq Q}$
 
     $\Pi(d) = [d - λ^* \cdot c]_+$  with
                $λ^* \geq 0$ chosen s.t. $c^{\top}\Pi(d)=Q$   (if budget violated)
 -------------------------------------------------------------


In [None]:
def proj_budget(x, cost, Q, tol=1e-9, max_iter=60):
    """Works on (n,) or batched (B,n) tensors"""
    cost = cost.to(x)
    Q    = torch.as_tensor(Q, dtype=x.dtype, device=x.device)

    def _project(vec):
        v = torch.clamp(vec, min=0.0)
        if (v @ cost) <= Q:                      # already feasible
            return v
        lam_lo, lam_hi = 0.0, torch.max(v / cost).item()
        for _ in range(max_iter):
            lam_mid = 0.5 * (lam_lo + lam_hi)
            vv = torch.clamp(v - lam_mid * cost, min=0.0)
            (lam_lo, lam_hi) = (lam_mid, lam_hi) if (vv @ cost) > Q else (lam_lo, lam_mid)
        return torch.clamp(v - lam_hi * cost, min=0.0)

    if x.dim() == 1:
        return _project(x)
    return torch.stack([_project(v) for v in x])           # batched

In [None]:
(n, )
(n, m)

(k=100, n=5000 ,m=1)

(k, n, m = 10)

(k,n,)

(k,n,m=1)

risk.shape[1] if len(shape) >1 else 1

(n,)
(n,1)
(k, n)
(n,)
(n, 1)
(n, m)

for mi in m:
    pass
[[1, 2], [3, 4], [5, 6]]
[1,2,3,4,5,6]
[[1,2,3],[4,5,6]]

-------------------------------------------------------------
 One PGD update that *is differentiable* wrt both r & d 
 
-------------------------------------------------------------

In [4]:
def alpha_fair_torch(u, alpha):
    if   alpha == 1:   return torch.sum(torch.log(u), dim=-1)
    elif alpha == 0:   return torch.sum(u, dim=-1)
    elif alpha == 'inf': return torch.min(u, dim=-1).values
    return torch.sum(u.pow(1-alpha)/(1-alpha), dim=-1)

def pgd_step(r, d, g, cost, Q, alpha, lr):
    d.requires_grad_(True)
    obj = alpha_fair_torch(d * r * g, alpha).sum()
    grad_d, = torch.autograd.grad(obj, d, create_graph=True)
    return proj_budget(d + lr * grad_d, cost, Q)

In [5]:
def closed_form_solver_torch(r, g, cost, alpha, Q):
    """Wraps `solve_closed_form` so it works on torch + batches."""
    single_input = r.dim() == 1
    if single_input:
        r_np   = r.detach().cpu()
        d_np, _ = solve_closed_form(g.cpu(), r_np, cost.cpu(), alpha, Q)
        return torch.as_tensor(d_np, dtype=r.dtype, device=r.device)
    # batched
    outs = []
    for r_i in r:
        d_i, _ = solve_closed_form(g.cpu(), r_i.cpu(), cost.cpu(), alpha, Q)
        outs.append(torch.as_tensor(d_i, dtype=r.dtype, device=r.device))
    return torch.stack(outs)

 ------------------------------------------------------------
 4.  Fold‑Opt layer
 ------------------------------------------------------------

In [None]:
def make_foldopt_layer(g, cost, alpha, Q,
                       lr=1e-2, n_fixedpt=50, rule='GMRES'):
    # -------- non‑differentiable solver ---------------
    def solver_fn(r):
        return closed_form_solver_torch(r, g, cost, alpha, Q)

    # -------- one differentiable update g(c,x) -------------
    def update_fn(r, x_star, *_):          # extra *args ignored by Fold‑Opt
        g_batched  = g.expand_as(r) if g.dim()==1 and r.dim()==2 else g
        return pgd_step(r, x_star, g_batched, cost, Q, alpha, lr)

    return FoldOptLayer(solver_fn, update_fn,
                        n_iter=n_fixedpt, backprop_rule=rule)

In [None]:
if __name__ == "__main__":
    torch.manual_seed(0)

    r     = torch.tensor([1.,2.,3.,4.,5.], requires_grad=True)
    g     = torch.ones_like(r)
    cost  = torch.ones_like(r)
    alpha = 2
    Q     = 2.0

    layer = make_foldopt_layer(g, cost, alpha, Q, lr=1e-2, n_fixedpt=40)

    d_star = layer(r)
    d_star.sum().backward() 

    d_cf, _  = solve_closed_form(g, r.detach(), cost, alpha, Q)    # numpy
    J_cf     = compute_gradient_closed_form(g, r.detach(), cost, alpha, Q)  # numpy

    print("‖d_foldopt - d_closed‖∞ :", (d_star - torch.tensor(d_cf)).abs().max().item())
    print("Fold-Opt Jacobian shape :", r.grad.shape)          # (n,) ← want (n,) grad test

RuntimeError: Mismatch in shape: grad_output[0] has a shape of torch.Size([5, 1]) and output[0] has a shape of torch.Size([5]).

In [18]:
def my_solver(c):
    return torch.clamp(c, min=0.0)

def my_update_step(c, x):
    alpha = 0.1
    grad  = x - c
    x_new = torch.clamp(x - alpha*grad, min=0.0)
    return x_new

fold_layer = FoldOptLayer(
                solver      = my_solver,
                update_step = my_update_step,
                n_iter      = 20,
                backprop_rule='FPI')

c      = torch.tensor([[1.0], [ 2.0]], requires_grad=True)   # (B=2, n=1)
target = torch.tensor([[ 1], [ 1]])

x_star = fold_layer(c)
print("x* from FoldOptLayer:", x_star.squeeze().tolist())     # → [0.0, 2.0]

loss = 0.5 * torch.sum((x_star - target) ** 2)
loss.backward()

print("Grad wrt c:", c.grad.squeeze().tolist())

x* from FoldOptLayer: [1.0, 2.0]
Grad wrt c: [0.0, 0.8893653750419617]


In [5]:
lr = 0.2
max_iter = 500
tol = 1e-8
dtype = torch.double
device = "cpu"