Skip to content

Commit

Permalink
ported cvxquad: matrix log, geometric mean, quantum entropy (#418)
Browse files Browse the repository at this point in the history
* ported cvxquad: matrix log, geometric mean, quantum entropy

* comments about vexity

* throw DimensionMismatch for wrong size matrices

* throw DomainError instead of `error`

* added @test_throw

* refactored range assert

* added function overloads taking Integer, because that doesn't auto-convert to Rational

* more tests

* fix relative_entropy_epicone comment

* document relative_entropy_epicone

* made RelativeEntropyEpiCone a constraint rather than a function

* minor changes and more coverage

* fix diagm calls for old julia

* fix for entropy with real matrices

* RelativeEntropyEpiCone test

* trace_logm tests

* added cvxquad license

* fix
  • Loading branch information
dstahlke committed May 4, 2021
1 parent 267bac0 commit 9656d62
Show file tree
Hide file tree
Showing 13 changed files with 1,972 additions and 8 deletions.
28 changes: 28 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,31 @@ MIT "Expat" License:
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The files geom_mean_epicone.jl, geom_mean_hypocone.jl, lieb_ando.jl,
quantum_entropy.jl, quantum_relative_entropy.jl,
relative_entropy_epicone.jl, trace_logm.jl, and trace_mpower.jl in
src/atoms/sdp_cone are adapted from cvxquad
(https://github.com/hfawzi/cvxquad) which is licensed under the MIT
"Expat" license:

> Copyright (c) 2021 Hamza Fawzi
>
> Permission is hereby granted, free of charge, to any person obtaining
> a copy of this software and associated documentation files (the
> "Software"), to deal in the Software without restriction, including
> without limitation the rights to use, copy, modify, merge, publish,
> distribute, sublicense, and/or sell copies of the Software, and to
> permit persons to whom the Software is furnished to do so, subject to
> the following conditions:
>
> The above copyright notice and this permission notice shall be
> included in all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 changes: 16 additions & 8 deletions docs/src/operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,22 @@ Semidefinite Program Representable Functions
An optimization problem using these functions can be solved by any SDP
solver (including SCS and Mosek).

| operation | description | vexity | slope | notes |
|------------------|-------------------------------|--------|---------------|-------|
| `nuclearnorm(x)` | sum of singular values of $x$ | convex | not monotonic | none |
| `opnorm(x, 2)` (`operatornorm(x)`) | max of singular values of $x$ | convex | not monotonic | none | |
| `eigmax(x)` | max eigenvalue of $x$ | convex | not monotonic | none |
| `eigmin(x)` | min eigenvalue of $x$ | concave | not monotonic | none |
| `matrixfrac(x, P)` | $x^TP^{-1}x$ | convex | not monotonic | IC: P is positive semidefinite |
| `sumlargesteigs(x, k)` | sum of top $k$ eigenvalues of $x$ | convex | not monotonic | IC: P symmetric |
| operation | description | vexity | slope | notes |
|------------------------------------------------|----------------------------------------------------------------|------------------------------------------------------------------------|---------------|------------------------------------------------------------------|
| `nuclearnorm(x)` | sum of singular values of $x$ | convex | not monotonic | none |
| `opnorm(x, 2)` (`operatornorm(x)`) | max of singular values of $x$ | convex | not monotonic | none |
| `eigmax(x)` | max eigenvalue of $x$ | convex | not monotonic | none |
| `eigmin(x)` | min eigenvalue of $x$ | concave | not monotonic | none |
| `matrixfrac(x, P)` | $x^TP^{-1}x$ | convex | not monotonic | IC: P is positive semidefinite |
| `sumlargesteigs(x, k)` | sum of top $k$ eigenvalues of $x$ | convex | not monotonic | IC: P symmetric |
| `T in GeomMeanHypoCone(A, B, t)` | $T \preceq A \#_t B = A^{1/2} (A^{-1/2} B A^{-1/2})^t A^{1/2}$ | concave | increasing | IC: $A \succeq 0$, $B \succeq 0$, $t \in [0,1]$ |
| `T in GeomMeanEpiCone(A, B, t)` | $T \succeq A \#_t B = A^{1/2} (A^{-1/2} B A^{-1/2})^t A^{1/2}$ | convex | not monotonic | IC: $A \succeq 0$, $B \succeq 0$, $t \in [-1, 0] \cup [1, 2]$ |
| `quantum_entropy(X)` | $-\textrm{Tr}(X \log X)$ | concave | not monotonic | IC: $X \succeq 0$; uses natural log |
| `quantum_relative_entropy(A, B)` | $\textrm{Tr}(A \log A - A \log B)$ | convex | not monotonic | IC: $A \succeq 0$, $B \succeq 0$; uses natural log |
| `trace_logm(X, C)` | $\textrm{Tr}(C \log X)$ | concave in X | not monotonic | IC: $X \succeq 0$, $C \succeq 0$, $C$ constant; uses natural log |
| `trace_mpower(A, t, C)` | $\textrm{Tr}(C A^t)$ | concave in A for $t \in [0,1]$, convex for $t \in [-1,0] \cup [1,2]$ | not monotonic | IC: $X \succeq 0$, $C \succeq 0$, $C$ constant, $t \in [-1, 2]$ |
| `lieb_ando(A, B, K, t)` | $\textrm{Tr}(K' A^{1-t} K B^t)$ | concave in A,B for $t \in [0,1]$, convex for $t \in [-1,0] \cup [1,2]$ | not monotonic | IC: $A \succeq 0$, $B \succeq 0$, $K$ constant, $t \in [-1, 2]$ |
| `T in RelativeEntropyEpiCone(X, Y, m, k, e)` | $T \succeq e' X^{1/2} \log(X^{1/2} Y^{-1} X^{1/2}) X^{1/2} e$ | convex | not monotonic | IC: $e$ constant; uses natural log |

Exponential + SDP representable Functions
-----------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions src/Convex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ export conv, dotsort, entropy, exp, geomean, hinge_loss, huber, inner_product, i
export log_perspective, logisticloss, logsumexp, matrixfrac, neg, norm2, norm_1, norm_inf, nuclearnorm
export partialtrace, partialtranspose, pos, qol_elementwise, quadform, quadoverlin, rationalnorm
export relative_entropy, scaledgeomean, sigmamax, square, sumlargest, sumlargesteigs, sumsmallest, sumsquares
export GeomMeanHypoCone, GeomMeanEpiCone, RelativeEntropyEpiCone, quantum_relative_entropy, quantum_entropy
export trace_logm, trace_mpower, lieb_ando

# rexports from LinearAlgebra
export diag, diagm, Diagonal, dot, eigmax, eigmin, kron, logdet, norm, tr
Expand Down Expand Up @@ -152,6 +154,14 @@ include("atoms/sdp_cone/operatornorm.jl")
include("atoms/sdp_cone/eig_min_max.jl")
include("atoms/sdp_cone/matrixfrac.jl")
include("atoms/sdp_cone/sumlargesteigs.jl")
include("atoms/sdp_cone/geom_mean_hypocone.jl")
include("atoms/sdp_cone/geom_mean_epicone.jl")
include("atoms/sdp_cone/relative_entropy_epicone.jl")
include("atoms/sdp_cone/quantum_relative_entropy.jl")
include("atoms/sdp_cone/quantum_entropy.jl")
include("atoms/sdp_cone/trace_logm.jl")
include("atoms/sdp_cone/trace_mpower.jl")
include("atoms/sdp_cone/lieb_ando.jl")

### exponential atoms
include("atoms/exp_cone/exp.jl")
Expand Down
122 changes: 122 additions & 0 deletions src/atoms/sdp_cone/geom_mean_epicone.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#############################################################################
# geom_mean_epicone.jl
# Constrains T to
# A #_t B ⪯ T
# where:
# * A #_t B is the t-weighted geometric mean of A and B:
# A^{1/2} (A^{-1/2} B A^{-1/2})^t A^{1/2}
# Parameter t should be in [-1, 0] or [1, 2].
# * Constraints A ⪰ 0, B ⪰ 0 are added.
# All expressions and atoms are subtypes of AbstractExpr.
# Please read expressions.jl first.
#
# Note on fullhyp: GeomMeanEpiCone will always return a full epigraph cone
# (unlike GeomMeanHypoCone) and so this parameter is not really used. It is
# here just for consistency with the GeomMeanHypoCone function.
#
#REFERENCE
# Ported from CVXQUAD which is based on the paper: "Lieb's concavity
# theorem, matrix geometric means and semidefinite optimization" by Hamza
# Fawzi and James Saunderson (arXiv:1512.03401)
#############################################################################

struct GeomMeanEpiCone
A::AbstractExpr
B::AbstractExpr
t::Rational
size::Tuple{Int, Int}

function GeomMeanEpiCone(A::AbstractExpr, B::AbstractExpr, t::Rational, fullhyp::Bool=true)
if size(A) != size(B)
throw(DimensionMismatch("A and B must be the same size"))
end
n = size(A)[1]
if size(A) != (n, n)
throw(DimensionMismatch("A and B must be square"))
end
if t < -1 || (t > 0 && t < 1) || t > 2
throw(DomainError(t, "t must be in the range [-1, 0] or [1, 2]"))
end
return new(A, B, t, (n, n))
end

GeomMeanEpiCone(A::Value, B::AbstractExpr, t::Rational, fullhyp::Bool=true) = GeomMeanEpiCone(Constant(A), B, t, fullhyp)
GeomMeanEpiCone(A::AbstractExpr, B::Value, t::Rational, fullhyp::Bool=true) = GeomMeanEpiCone(A, Constant(B), t, fullhyp)
GeomMeanEpiCone(A::Value, B::Value, t::Rational, fullhyp::Bool=true) = GeomMeanEpiCone(Constant(A), Constant(B), t, fullhyp)

GeomMeanEpiCone(A::AbstractExprOrValue, B::AbstractExprOrValue, t::Integer, fullhyp::Bool=true) = GeomMeanEpiCone(A, B, t//1, fullhyp)
end

struct GeomMeanEpiConeConstraint <: Constraint
head::Symbol
id_hash::UInt64
T::AbstractExpr
cone::GeomMeanEpiCone

function GeomMeanEpiConeConstraint(T::AbstractExpr, cone::GeomMeanEpiCone)
if size(T) != cone.size
throw(DimensionMismatch("T must be size $(cone.size)"))
end
id_hash = hash((cone.A, cone.B, cone.t, :GeomMeanEpiCone))
return new(:GeomMeanEpiCone, id_hash, T, cone)
end

GeomMeanEpiConeConstraint(T::Value, cone::GeomMeanEpiCone) = GeomMeanEpiConeConstraint(Constant(T), cone)
end

in(T, cone::GeomMeanEpiCone) = GeomMeanEpiConeConstraint(T, cone)

function AbstractTrees.children(constraint::GeomMeanEpiConeConstraint)
return (constraint.T, constraint.cone.A, constraint.cone.B, "t=$(constraint.cone.t)")
end

# For t ∈ [-1, 0] ∪ [1, 2] the t-weighted matrix geometric mean is matrix convex (arxiv:1512.03401).
# So if A and B are convex sets, then T ⪰ A #_t B will be a convex set.
function vexity(constraint::GeomMeanEpiConeConstraint)
A = vexity(constraint.cone.A)
B = vexity(constraint.cone.B)
T = vexity(constraint.T)

# NOTE: can't say A == NotDcp() because the NotDcp constructor prints a warning message.
if typeof(A) == ConcaveVexity || typeof(A) == NotDcp
return NotDcp()
end
if typeof(B) == ConcaveVexity || typeof(B) == NotDcp
return NotDcp()
end
# Copied from vexity(c::GtConstraint)
vex = ConvexVexity() + (-T)
if vex == ConcaveVexity()
vex = NotDcp()
end
return vex
end

function conic_form!(constraint::GeomMeanEpiConeConstraint, unique_conic_forms::UniqueConicForms)
if !has_conic_form(unique_conic_forms, constraint)
A = constraint.cone.A
B = constraint.cone.B
t = constraint.cone.t
T = constraint.T
is_complex = sign(A) == ComplexSign() || sign(B) == ComplexSign() || sign(T) == ComplexSign()
if is_complex
make_temporary = () -> HermitianSemidefinite(size(A)[1])
else
make_temporary = () -> Semidefinite(size(A)[1])
end

Z = make_temporary()

if t <= 0
conic_form!([T A; A Z] 0, unique_conic_forms)
conic_form!(Z in GeomMeanHypoCone(A, B, -t, false), unique_conic_forms)
else
@assert t >= 1 # range of t checked in GeomMeanEpiCone constructor
conic_form!([T B; B Z] 0, unique_conic_forms)
conic_form!(Z in GeomMeanHypoCone(A, B, 2-t, false), unique_conic_forms)
end

cache_conic_form!(unique_conic_forms, constraint, Array{Convex.ConicConstr,1}())
end
return get_conic_form(unique_conic_forms, constraint)
end

0 comments on commit 9656d62

Please sign in to comment.