$\textbf{GOAL :}$ Get a certificate telling the "quality" of our local minimum.

Inspired from Section 5 of my survey.

Steps,
- Minimize as already did the primal problem. 
- Find the associated point in the dual
- Compute the dual function at this point : **hard**, we can get only a bound on the certificate because we need to compute a infinimum over non-convex fonction.
- Compute the gap
- Determine the quality of the local minimum 

In [4]:
using LinearAlgebra, Statistics, Random, Printf, Plots

In [5]:
rho(x, ν)  = abs(x) / (1 + abs(x)^(1-ν))
rho_prime(x, ν) = (1 + ν*abs(x)^(1-ν)) / (1 + abs(x)^(1-ν))^2 

rho_prime (generic function with 1 method)

# Setup ($\lambda_{QUT}$ and ISTA)

In [6]:
include("../functions/ISTA.jl")
include("../functions/metrics.jl")

import .ISTA: ista_L, ista
import .metrics: pesr, tpr, fdr, f1score

In [7]:
function bisection(f, a, b; tol=1e-10, maxiter=1000, verbose=false)
    if f(a)==0
        return a, 0
    end
    if f(b)==0
        return b, 0
    end
    
    if verbose
        @printf("start with a = %.12f : f(a) = %.12f, b = %.12f : f(b) = %.12f\n", a, f(a), b, f(b))
    end
    fa, fb = f(a), f(b)
    # TODO : attention ! special pour ce cas et se rappeler pourquoi
    while f(a)*f(b)>0
        b*=2
        if b> 1e10
            return a, -1
        end
        
    end
    # @assert fa * fb < 0 "Need f(a)f(b)<0 : $(fa) | $(fb)"
    for k in 1:maxiter
        m  = 0.5*(a+b)
        fm = f(m)
        if abs(fm) <= tol || 0.5*(b-a) <= tol
            if verbose
                @printf("bisection: found %.12f in %d iterations | a = %.12f, b = %.12f, f(a) = %.12f, f(b) = %.12f\n", m, k, a, b, fa, fb)
            end         
            return m, k
        elseif fm>0
            b, fb = m, fm
        else
            a, fa = m, fm
        end
    end
    error("No convergence in $maxiter iterations")
end

bisection (generic function with 1 method)

In [8]:
function kappa_phi(Γ::Float64, ν::Float64)
    T = eps(Float64)
    kappa_poly(κ) = κ^(2-ν) + 2κ + κ^ν + 2Γ*(ν-1) 
    κ, it = bisection(kappa_poly, 0.0, max(1.0, Γ*(1-ν)), verbose=false)
    φ = κ/2 +  Γ/(1 + κ^(1-ν))

    return κ, φ
end

kappa_phi (generic function with 1 method)

In [9]:
function x_tar(z::Float64, Γ::Float64, ν::Float64, κ::Float64)
    h(x) = x + Γ * rho_prime(x, ν) - z
    res, _ = bisection(h,κ,z; tol = 1e-10, verbose=false)
    return res
end

x_tar (generic function with 1 method)

In [10]:
function prox_harder_vec(z::AbstractVector, γ::Float64, λ::Float64, ν::Float64)
    if ν== 1.0 
        return sign.(z) .* max.(abs.(z) .- γ * λ*0.5, 0.0) # soft thresholding (see l_qut_1) # TODO page 4 harderLASSO pdf phi_1 (confirmer)
    end
    # else proximal operator for harder penalty
    Γ = γ * λ 
    κ, φ = kappa_phi(Γ, ν)    

    β_new = map(z[1:end-1]) do zi # coordinate loop
        abs(zi) ≤ φ - eps() ? 0.0 : sign(zi) * x_tar(abs(zi), Γ, ν, κ) 
    end
    return vcat(β_new, z[end])
end


prox_harder_vec (generic function with 1 method)

In [11]:
function lambda_local0(y, X; verbose=false)
    r = y .- mean(y)
    if verbose
        @printf("residu norm : %.12f\n", norm(r))
    end
    return maximum(abs.(X' * r)) / norm(r)
end

function lambda_qut_v(X; α=0.05, M=1000, rng=Random.GLOBAL_RNG, verbose=false)
    n = size(X,1)
    λvals = Vector{Float64}(undef, M)
    for m in 1:M
        y0 = randn(rng, n)
        λvals[m] = lambda_local0(y0, X; verbose=verbose)
    end
    return quantile!(λvals, 1-α)
end

lambda_qut_v (generic function with 1 method)

# On a single sample

## Get the primal value

We don't handle intercept for the start to simplify a little bit : it's enough hard like this.

In [24]:
n, p = 70, 250
B_qut = 1000
sigma = 0.1
ν = 0.1
s = 0
;

In [25]:
mu = 1. # mean of the noise
c = 0.0 # intercept
tol = 1e-9

X = randn(n, p)
X .-= mean(X; dims=1)
X ./= std(X;  dims=1)
# X̃ = hcat(X, ones(n)) # we have intercept (θ₂) so it corresponds to 1

y = sigma*mu* randn(n) .+ c
λ = lambda_qut_v(X; α=0.05, M=B_qut, rng=Random.GLOBAL_RNG, verbose=false)
       
# f(θ) = norm(y - X̃*θ,2)
# ∇f(θ) =X̃'*(X̃*θ - y) ./ norm(X̃*θ - y,2)
# penalty(θ) = λ * sum(rho.(θ[1:end-1], ν)) # penalty on β only

f(θ) = norm(y - X*θ,2)
∇f(θ) =X'*(X*θ - y) ./ norm(X*θ - y,2)
penalty(θ) = λ * sum(rho.(θ, ν)) # penalty on β only

L0 = 0.001

# θ_hat = ista(zeros(p+1), f, penalty, ∇f, L0, (z, γ) -> prox_harder_vec(z, γ, λ, ν); max_iter=10_000, tol=tol, verbose=true)
θ_hat = ista(zeros(p), f, penalty, ∇f, L0, (z, γ) -> prox_harder_vec(z, γ, λ, ν); max_iter=10_000, tol=tol, verbose=false)
;

## Associated dual point

<span style="color:red"><b>TODO : Verifier !</b></span>

We want find the dual point in $\Phi_{lsc}$ defined by $a\geq 0$, $u \in \mathbb{R}^n$ and $c\in \mathbb{R}$

$$\psi_{a,u,c} : y \mapsto -a\|y\|_{2}^{2}+\langle u,y\rangle+c ,
\qquad  
a\ge 0,\;u\in\mathbb R^{n},\;c\in\mathbb R,
$$

We denote, 
$$
r:=X\theta_{\text{pred}}-z_{\text{pred}}
   \quad\Longrightarrow\quad
   \|r\|_{2}\le\varepsilon .
$$

$$
\nabla f\bigl(z_{\text{pred}}\bigr)=
\frac{z_{\text{pred}}-y_{\text{obs}}}{\|y_{\text{obs}}-z_{\text{pred}}\|_{2}}
=:g .
$$

The Lagrangian of the relaxed problem is
$L(\theta,z,a)=f(z)+g(\theta)+a(\|X\theta-z\|_{2}^{2}-\varepsilon)$.
Stationarity in $z$ yields

$$
0=\nabla_{z}L
  =g-2a\,r
  \quad\Longrightarrow\quad
  u:=g-2a\,r
$$

because $u$ is precisely the linear part of the quadratic $\psi_{a,u,c}$.

Take the dot-product of the previous relation with $r$,

$$
0=\langle g,r\rangle-2a\|r\|_{2}^{2}
\quad\Longrightarrow\quad
a=\frac{\langle g,r\rangle}{2\|r\|_{2}^{2}}
\;\;(\ge 0\ \text{by complementary slackness}).
$$

Choose the constant so that $\psi(0)$ equals $-a\varepsilon$ (because $y_{0}=0$),

$$
c:=-a\,\varepsilon .
$$

Hence,
$$
\boxed{\;
\psi_{a,u,c}(y)=-a\|y\|_{2}^{2}+\langle u,y\rangle-a\varepsilon
\;}
$$

with

$$
a=\max\!\Bigl(0,\; \frac{\langle g,r\rangle}{2\|r\|_{2}^{2}}\Bigr),
\qquad
u=g-2a\,r 
$$

<span style="color:red"><b>TODO : Ne pas considerer $z_{pred}=X \theta_{pred}$ mais plutot $z_{pred}$ comme la projection de y sur la boule $||z-X\theta|| \leq \sqrt{\varepsilon}$ </b></span>

In [64]:
z_hat = X*θ_hat
g = (z_hat .- y) ./ norm(y .- z_hat)

delta = 1e-6
d = delta .* g ./ norm(g)
r = d
a = (g' * r) / (2.0 * norm(r)^2)
u = g - 2.0 * a * r
c = -a * delta^2
println("a = ", a, " | u = ", u, " | c = ", c)

a = 500000.0 | u = [-6.938893903907228e-18, 2.7755575615628914e-17, 3.469446951953614e-18, 0.0, 0.0, 0.0, 0.0, 0.0, -3.469446951953614e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.938893903907228e-18, 0.0, 0.0, 2.7755575615628914e-17, 0.0, 0.0, 0.0, 0.0, 4.336808689942018e-19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.3877787807814457e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.3877787807814457e-17, 0.0, 6.938893903907228e-18, 0.0, 1.734723475976807e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -3.469446951953614e-18, 0.0, 0.0, 0.0, 0.0, -6.938893903907228e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 6.938893903907228e-18, 0.0, 2.7755575615628914e-17, 1.3877787807814457e-17, 0.0] | c = -5.0e-7


In [61]:
u .= round.(u, digits=15)
sum(abs.(u))

0.0

## Dual value

Let's compute for this $\varphi \in \Phi$ the dual value.

$$
\inf_{x=(\theta, z) \in \mathbb{R}^(p+n)} \bigl\{ F(x) + \varphi(0) - s(||X\theta-z||_2^2-\varepsilon) \bigr\} \quad \text{with}\quad s:r \mapsto 
\begin{cases}
c+\dfrac{\|u\|^{2}}{4a}&\text{if }a>0\text{ and }\displaystyle\frac{u}{2a}\in K(r),\\[6pt]
-a\,\|r\mathbf 1\|^{2}+\langle u,r\mathbf 1\rangle + c&\text{otherwise if }a>0,\\[6pt]
+\infty&\text{if }a=0\text{ and }\exists i:u_{i}>0,\\[6pt]
\langle u,r\mathbf 1\rangle + c&\text{if }a=0\text{ and }u\le 0\text{ component-wise}.
\end{cases}
$$



In [66]:
function s_auc(r,a,u,c)
    if a>0 && u/2a>=r
        return c+norm(u)^2 / (4.0 * a)
    end
    if a>0 
        return -a * norm(r)^2 + dot(u, r) + c
    end
    if a==0 && sum(u>0)==0
        return c + dot(u,r) 
    end
    
    error("a must be positive or component of u must be negative")
end

function d_s_auc(r,a,u,c)
    if a>0 && u/2a>=r
        return 0
    end
    if a>0 
        return -2ar +u
    end
    if a==0 && sum(u>0)==0
        return u
    end
    
    error("a must be positive or component of u must be negative")
end

mu_auc = -0.5*d_s_auc(r,a,u,c)


-0.0

In [None]:
function z_star(θ)
    tau = 1/
    prox_f = y + ()
    return 
end

<span style="color:red"><b>TODO : Finir ! </b></span>

<span style="color:red"><b>Les valeurs de a sont aberrantes, le fait que l'on vérifie $X\theta=z$ explose toutes les valeurs et difficile de savoir si c'est normal, cohérent, \dots Je préfère m'arrêter là et rependre plus tard</b></span>



#

# $\ell_1$

Although $\ell_1$ is convex and our problem verify Slater's condition and so verify with $\Phi_{Aff}$ strong duality, we will see if the framework with $\Phi_{lsc}$ allows to find again the zero-duality gap from strong duality. So let's implement same steps than precedent on $\ell_1$ penalty.


## $\lambda_{QUT}$

In [96]:
function lambda_qut_q1(X; B::Int=1000, α::Float64=0.05, seed::Int=42)
    n, p = size(X)
    H = I-ones(n,n)./n
    λs = [
        begin
            u = randn(n)
            Hu = H*u
            maximum(abs.(X'*Hu)) / norm(Hu,2)
        end
        for _ in 1:B
    ]
    return quantile(λs, 1-α)
end

lambda_qut_q1 (generic function with 1 method)

## Get the primal value

In [128]:
n, p = 70, 250
B_qut = 1000
sigma = 0.1
s = 0
;

We work without intercept for these examples so we don't unpenlized last coefficient of $\theta$

In [129]:
mu = 1. # mean of the noise
c = 0.0 # intercept
tol = 1e-12

X = randn(n, p)
X .-= mean(X; dims=1)
X ./= std(X;  dims=1)
# X̃ = hcat(X, ones(n)) # we have intercept (θ₂) so it corresponds to 1

y = sigma*mu* randn(n) .+ c
λ = lambda_qut_q1(X; α=0.05, B=B_qut)
       

f(θ) = norm(y - X*θ,2)
∇f(θ) =X'*(X*θ - y) ./ norm(X*θ - y,2)
penalty(θ) = λ * sum(abs.(θ[1:p]))
function prox(u, step)
    v = similar(u)
    for j in 1:p
        v[j] = sign(u[j]) * max(abs(u[j]) - step * λ, 0.0)
    end
    # v[end] = u[end] # ! I have no intercept
    return v
end

L0 = 0.001

θ_hat = ista(zeros(p), f, penalty, ∇f, L0, prox; max_iter=10_000, tol=tol, verbose=false)
;

## Associated dual point

From there we have 2 choices. We predict from the primal $\hat{\theta}$ wheras our problem, with "tautological" constraint has another variable to optimize : $z$. Hence, we can 

+ 1. From the constraint take $\hat{z} = X\hat{\theta}$
+ 2. From the relaxed constraint take $\hat{z} = P_{C(\hat{\theta})}(y)=\arg\min_{z\;:\;\|X\hat{\theta}-z\|_2\le\sqrt{\varepsilon}}\|z-y\|_2$  
That's to say the projection of $y$ on the closed ball $C : \theta \mapsto \{ z | \|X\theta-z\|_2\le\sqrt{\varepsilon}  \}$ ; This allows to get a valid $z$ closed to target value. Moreover, we have an explicit expression for the projection,

$$
P_{C(\theta)} : y \mapsto
\begin{cases}
y & \text{if }\|y-X\theta\|_2\le \sqrt{\varepsilon}\\[4pt]
X\theta+\sqrt{\varepsilon}\,\dfrac{y-X\theta}{\|y-X\theta\|_2} & \text{otherwise}
\end{cases}
$$

### Case 1 :  $\hat{z} := X\hat{\theta}$

By complementarity slackness, we get $||X\hat{\theta}-\hat{z}|| = 0 < \varepsilon$ so 
$$a=0,\qquad u=g \in \partial ||\hat{z}-y||_2 =
\begin{cases}
\dfrac{X\theta_{\text{pred}}-y}{\|X\theta_{\text{pred}}-y\|_2} & \text{if }X\theta_{\text{pred}}\ne y\\
\text{all }g\ \text{verifying }\|g\|_2\le 1 & \text{otherwise}
\end{cases} \qquad c=0
$$

$$ \psi_{0,g,0} (\cdot)=\langle g,\cdot\rangle$$

Apprently, the feasible set has to be <span style="color:red"><b>TODO : Proof </b></span>
$$
\|g\|_2\le 1\qquad \|X^\top g\|_\infty\le \lambda
$$

Hence, the dual function is given by 
$$
D : g \mapsto -\langle g,y\rangle-\sqrt{\varepsilon}\|g\|_2
$$

<span style="color:red"><b>TODO : Mieux rediger réponse + voir commentaire</b></span>
<!-- 
Très court, étape par étape (cas 1).

## Pourquoi apparaît $-\sqrt{\varepsilon}\,\|g\|_2$

Primal

$$
\min_{\theta,z}\ \|z-y\|_2+\lambda\|\theta\|_1\quad\text{s.c.}\quad \|X\theta-z\|_2^2\le \varepsilon
$$

1. **Support de la norme**

$$
\|z-y\|_2=\sup_{\|g\|_2\le 1}\ \langle g,z-y\rangle
$$

On introduit $g$ et on permute $\inf$/$\sup$.

2. **Lagrangien** avec $\mu\ge 0$

$$
L(\theta,z,\mu;g)=\langle g,z-y\rangle+\lambda\|\theta\|_1+\mu(\|X\theta-z\|_2^2-\varepsilon)
$$

3. **$\inf_z$** par complétion du carré
   Écrire $r=z-X\theta$. Alors

$$
\langle g,z-y\rangle+\mu\|X\theta-z\|_2^2
=\langle g,X\theta-y\rangle+\mu\|r\|_2^2-\langle g,r\rangle
$$

Minimisation en $r$ donne $r^\star=\tfrac{g}{2\mu}$ et

$$
\inf_z(\cdot)=\langle g,X\theta-y\rangle-\frac{\|g\|_2^2}{4\mu}
$$

4. **$\inf_\theta$** (conjugué de $\lambda\|\cdot\|_1$)

$$
\inf_\theta\ \lambda\|\theta\|_1+\langle X^\top g,\theta\rangle
=\begin{cases}
0 & \text{si }\|X^\top g\|_\infty\le \lambda\\
-\infty & \text{sinon}
\end{cases}
$$

5. **Dual partiel $(g,\mu)$**

$$
q(g,\mu)= -\langle g,y\rangle-\frac{\|g\|_2^2}{4\mu}-\mu\varepsilon
\quad\text{s.c.}\quad \|g\|_2\le 1,\ \|X^\top g\|_\infty\le \lambda,\ \mu\ge 0
$$

6. **Élimination de $\mu$**
   Pour $a=\|g\|_2^2/4,\ b=\varepsilon$,

$$
\sup_{\mu\ge 0}\Big(-\frac{a}{\mu}-b\mu\Big)=-2\sqrt{ab}=-\sqrt{\varepsilon}\,\|g\|_2
$$

(optimiseur $\mu^\star=\|g\|_2/(2\sqrt{\varepsilon})$ si $\|g\|_2>0$).

**Conclusion (duale serrée)**

$$
\boxed{\ \max_{\substack{\|g\|_2\le 1\\ \|X^\top g\|_\infty\le \lambda}}
\ \Big(-\langle g,y\rangle-\sqrt{\varepsilon}\,\|g\|_2\Big)\ }
$$

> Intuition équivalente (conjugué d’indicatrice)
> $\delta_{\{r:\|r\|\le \sqrt\varepsilon\}}^*(g)=\sup_{\|r\|\le \sqrt\varepsilon}\langle g,r\rangle=\sqrt\varepsilon\|g\|_2$.
> Dans le dual, elle apparaît avec un signe **moins**, d’où $-\sqrt{\varepsilon}\|g\|_2$. -->


### Case 2 : 

In [None]:
function project_onto_ball(y::AbstractVector, center::AbstractVector, radius::Real)
    d = norm(y - center, 2)
    d <= radius ? y : center + radius * (y - center) / d
end

In [None]:

function dual_phi_lsc_certificate(X, y, theta, λ; ε = 0.0, use_split=false, atol=1e-10)
    n = size(X, 1)
    xθ = X * theta

    if use_split && ε > 0
        z_pred = project_onto_ball(y, xθ, sqrt(ε))
    else
        z_pred = xθ
    end

    r = xθ - z_pred

    if norm(z_pred - y) > 0
        g = (z_pred - y) / norm(z_pred - y)
    else
        # subgradient of ||·|| at 0 is unit ball; pick 0 for robustness
        g = zeros(n)
    end

    # a,u,c in Φ_lsc
    if norm(r)^2 > 0
        a = max(0.0, dot(g, r) / (2 * norm(r)^2))
    else
        a = 0.0
    end
    u = g - 2a * r
    c = -a * ε

    # ℓ1 feasibility checks
    linf = maximum(abs.(X' * g))
    l1_feasible = linf <= λ + 1e-10
    supp = findall(!iszero, theta)
    sign_ok = all(abs(X[:, i]' * g + λ * sign(theta[i])) ≤ atol for i in supp)

    return (a=a, u=u, c=c, g=g, r=r, z_pred=z_pred,
            l1_feasible=l1_feasible, sign_ok=sign_ok, linf=linf)
end

# --- Optional: minimal correction to enforce ||X' g||_∞ ≤ λ by tuning a -----
function minimal_a_for_linf(X, g, r, λ)
    nr = norm(r)
    if nr == 0
        return 0.0
    end
    num = max.(abs.(X' * g) .- λ, 0.0)
    den = 2 .* abs.(X' * r)
    mask = den .> 0
    any(mask) ? maximum(num[mask] ./ den[mask]) : 0.0
end

function dual_with_correction(X, y, theta, λ; ε=0.0, use_split=false)
    cert = dual_phi_lsc_certificate(X, y, theta, λ; ε=ε, use_split=use_split)
    if !cert.l1_feasible
        a_hat = minimal_a_for_linf(X, cert.g, cert.r, λ)
        a = a_hat
        u = cert.g - 2a * cert.r
        c = -a * ε
        linf = maximum(abs.(X' * (cert.g - 2a * cert.r)))
        cert = merge(cert, (; a=a, u=u, c=c, l1_feasible=linf <= λ + 1e-10, linf=linf))
    end
    return cert
end


dual_with_correction (generic function with 1 method)

In [131]:
# you already have: X, y, λ, theta_hat
# Route A (no split): z_pred = X*θ_hat, a=0, u=g, c=0
certA = dual_phi_lsc_certificate(X, y, θ_hat, λ; ε=0.0, use_split=false)

# Route B (split): pick ε > 0 to get nonzero r (often nicer numerically)
ε = 1e-8
certB = dual_phi_lsc_certificate(X, y, θ_hat, λ; ε=ε, use_split=true)

# If needed, enforce dual feasibility by shrinking a along r
certB_fixed = dual_with_correction(X, y, θ_hat, λ; ε=ε, use_split=true)

@show certA.l1_feasible certA.sign_ok certA.linf
@show certB.l1_feasible certB.sign_ok certB.linf
@show certB_fixed.l1_feasible certB_fixed.sign_ok certB_fixed.linf


certA.l1_feasible = true
certA.sign_ok = true
certA.linf = 3.0734615860819594
certB.l1_feasible = true
certB.sign_ok = true
certB.linf = 3.0734615860819585
certB_fixed.l1_feasible = true
certB_fixed.sign_ok = true
certB_fixed.linf = 3.0734615860819585


3.0734615860819585

## Dual value

In [142]:
# ============================================================
# Dual value from a Φ_lsc certificate (a, u, c, r, g, z_pred)
# ============================================================

"""
Return dual value(s) and gap(s) using the Φ_lsc certificate.

Inputs:
  X, y, θ, λ : problem data and current iterate
  cert       : NamedTuple with fields a,u,c,g,r,z_pred (from dual_phi_lsc_certificate)

Outputs (NamedTuple):
  dual_no_split     = -dot(g, y)
  gap_no_split      = primal - dual_no_split
  dual_with_split   = -dot(g, y) + c        # if you used ε>0 and c=-aε
  gap_with_split    = primal - dual_with_split
  feasible_g        = feasibility flags for g (||g||_2≤1 and ||X'g||_∞≤λ)
  feasible_g_scaled = feasibility flags for g_scaled (after scaling)
  t_scale           = scaling factor used on g
  linf_g, norm_g    = diagnostics
"""
function dual_value_from_phi_cert(X, y, θ, λ, cert; tol=1e-10)
    # Rebuild g from (a,u,r). Should match cert.g up to numerical noise.
    g_cert = cert.u + 2*cert.a * cert.r

    # primal value at current θ
    r = X*θ - y
    primal = max(norm(r) - sqrt(ε), 0.0) + λ*sum(abs.(θ))

    # raw dual values (Route A and Route B)
    dual_no_split   = - dot(g_cert, y) - sqrt(ε) * norm(g_cert)
    dual_with_split = dual_no_split + cert.c   # when you used ε>0 and set c = -aε

    gap_no_split    = primal - dual_no_split
    gap_with_split  = primal - dual_with_split

    # feasibility of g w.r.t. classical dual constraints
    linf_g  = maximum(abs.(X' * g_cert))
    norm_g  = norm(g_cert)
    feasible_g = (norm_g <= 1 + tol) && (linf_g <= λ + tol)

    # Optional: scale g to enforce feasibility (g_scaled = t*g)
    t = 1.0
    if linf_g > λ + tol || norm_g > 1 + tol
        t = min(1.0,
                λ / max(linf_g, eps()),
                1.0 / max(norm_g, eps()))
    end
    g_scaled = t * g_cert
    linf_g_scaled = maximum(abs.(X' * g_scaled))
    norm_g_scaled = norm(g_scaled)
    feasible_g_scaled = (norm_g_scaled <= 1 + tol) && (linf_g_scaled <= λ + tol)

    dual_no_split_scaled   = - dot(g_scaled, y)- sqrt(ε) * norm(g_scaled)
    dual_with_split_scaled = dual_no_split_scaled + cert.c
    gap_no_split_scaled    = primal - dual_no_split_scaled
    gap_with_split_scaled  = primal - dual_with_split_scaled

    return (
        # raw (using g from (a,u,r))
        dual_no_split   = dual_no_split,
        gap_no_split    = gap_no_split,
        dual_with_split = dual_with_split,
        gap_with_split  = gap_with_split,
        feasible_g      = feasible_g,
        linf_g          = linf_g,
        norm_g          = norm_g,
        # scaled feasible variant (guaranteed lower bound)
        dual_no_split_scaled   = dual_no_split_scaled,
        gap_no_split_scaled    = gap_no_split_scaled,
        dual_with_split_scaled = dual_with_split_scaled,
        gap_with_split_scaled  = gap_with_split_scaled,
        feasible_g_scaled      = feasible_g_scaled,
        t_scale                = t,
        linf_g_scaled          = linf_g_scaled,
        norm_g_scaled          = norm_g_scaled
    )
end



dual_value_from_phi_cert

In [146]:
certA = dual_phi_lsc_certificate(X, y, θ_hat, λ; ε=0.0,  use_split=false)
infoA = dual_value_from_phi_cert(X, y, θ_hat, λ, certA)
@show infoA.dual_no_split infoA.gap_no_split infoA.feasible_g

# ε > 0 (split route)
ε = 1e-6
certB = dual_phi_lsc_certificate(X, y, θ_hat, λ; ε=ε, use_split=true)
infoB = dual_value_from_phi_cert(X, y, θ_hat, λ, certB)
@show infoB.dual_with_split infoB.gap_with_split infoB.feasible_g


infoA.dual_no_split = 0.9127061983600505
infoA.gap_no_split = 0.0
infoA.feasible_g = true
infoB.dual_with_split = 0.9112071983600506
infoB.gap_with_split = 0.0004999999999999449
infoB.feasible_g = true


true