diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 045f71e..27b7c21 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '1.10' + - '1' os: - ubuntu-latest arch: diff --git a/Project.toml b/Project.toml index f085158..bc4008b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Klamkin", "Michael and contributors"] version = "1.0.0-DEV" [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Dualization = "191a621a-6537-11e9-281d-650236a99e60" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -14,6 +15,7 @@ MathOptSetDistances = "3b969827-a86c-476c-9527-bb6f1a8fbad5" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] +Adapt = "4.3.0" Dualization = "=0.7.0" MathOptSetDistances = "=0.2.11" diff --git a/src/L2ODLL.jl b/src/L2ODLL.jl index c3d4c32..93a2dd1 100644 --- a/src/L2ODLL.jl +++ b/src/L2ODLL.jl @@ -1,5 +1,6 @@ module L2ODLL +import Adapt import Dualization import JuMP import LinearAlgebra @@ -17,6 +18,7 @@ const ADTypes = DI.ADTypes abstract type AbstractDecomposition end # must have p_ref and y_ref and implement can_decompose +include("batch.jl") include("layers/generic.jl") include("layers/bounded.jl") include("layers/convex_qp.jl") @@ -178,21 +180,27 @@ function y_shape(cache::DLLCache) end """ - flatten_y(y::AbstractVector) + flatten_y(y) Flatten a vector of `y` variables into a single vector, i.e. Vector{Vector{Float64}} -> Vector{Float64}. """ -function flatten_y(y::AbstractVector) +function flatten_y(y) return reduce(vcat, y) end """ - unflatten_y(y::AbstractVector, y_shape::AbstractVector{Int}) + unflatten_y(y::Vector{T}, y_shape::Vector{Int}) where T Unflatten a vector of flattened `y` variables into a vector of vectors, i.e. Vector{Float64} -> Vector{Vector{Float64}}. """ -function unflatten_y(y::AbstractVector, y_shape::AbstractVector{Int}) - return [y[start_idx:start_idx + shape - 1] for (start_idx, shape) in enumerate(y_shape)] +function unflatten_y(y::Vector{T}, y_shape::Vector{Int}) where T + result = Vector{T}[] + start_idx = 1 + for shape in y_shape + push!(result, y[start_idx:start_idx + shape - 1]) + start_idx += shape + end + return result end end # module \ No newline at end of file diff --git a/src/batch.jl b/src/batch.jl new file mode 100644 index 0000000..89e6448 --- /dev/null +++ b/src/batch.jl @@ -0,0 +1,133 @@ +abstract type AbstractExprMatrix end + +struct AffExprMatrix{V,T} <: AbstractExprMatrix + c::V + c0::T +end +struct QuadExprMatrix{M,V,T} <: AbstractExprMatrix + Q::M + c::V + c0::T +end +struct VecAffExprMatrix{V,T} <: AbstractExprMatrix + A::V + b::T +end + + +Adapt.@adapt_structure AffExprMatrix +Adapt.@adapt_structure QuadExprMatrix +Adapt.@adapt_structure VecAffExprMatrix + +(aem::AffExprMatrix)(x) = aem.c'*x + aem.c0 +(qem::QuadExprMatrix)(x) = x'*qem.Q*x + qem.c'*x + qem.c0 +(vaem::VecAffExprMatrix)(x) = vaem.A*x + vaem.b + + +function AffExprMatrix( + aff::Vector{JuMP.GenericAffExpr{T,V}}, + v::Vector{V}; + backend = nothing +) where {T,V} + n = length(v) + + c = zeros(T, n) + + for (coeff, vr) in JuMP.linear_terms(aff) + c[vr_to_idx[vr]] = coeff + end + + c = _backend_vector(backend)(c) + return AffExprMatrix(c, c0) +end + + +function QuadExprMatrix( + qexpr::JuMP.GenericQuadExpr{T,V}, + v::Vector{V}; + backend = nothing +) where {T,V} + quad_terms = JuMP.quad_terms(qexpr) + nq = length(quad_terms) + n = length(v) + + vr_to_idx = _vr_to_idx(v) + + Qi = Int[] + sizehint!(Qi, nq) + Qj = Int[] + sizehint!(Qj, nq) + Qv = T[] + sizehint!(Qv, nq) + c = zeros(T, n) + c0 = qexpr.aff.constant + + for (coeff, vr1, vr2) in quad_terms + push!(Qi, vr_to_idx[vr1]) + push!(Qj, vr_to_idx[vr2]) + push!(Qv, coeff) + end + for (coeff, vr) in JuMP.linear_terms(qexpr) + c[vr_to_idx[vr]] = coeff + end + + Q = _backend_matrix(backend)(Qi, Qj, Qv, n, n) + c = _backend_vector(backend)(c) + return QuadExprMatrix(Q, c, c0) +end + + +function VecAffExprMatrix( + vaff::Vector{JuMP.GenericAffExpr{T,V}}, + v::Vector{V}; + backend = nothing +) where {T,V} + m = length(vaff) + n = length(v) + + linear_terms = [JuMP.linear_terms(jaff) for jaff in vaff] + nlinear = sum(length.(linear_terms)) + + vr_to_idx = _vr_to_idx(v) + + Ai = Int[] + sizehint!(Ai, nlinear) + Aj = Int[] + sizehint!(Aj, nlinear) + Av = T[] + sizehint!(Av, nlinear) + b = zeros(T, m) + + for (i, jaff) in enumerate(vaff) + for (coeff, vr) in linear_terms[i] + if vr ∈ v + push!(Ai, i) + push!(Aj, vr_to_idx[vr]) + push!(Av, coeff) + else + error("Variable $vr from function $i not found") + end + end + b[i] = jaff.constant + end + + A = _backend_matrix(backend)(Ai, Aj, Av, m, n) + b = _backend_vector(backend)(b) + return VecAffExprMatrix(A, b) +end + +function _vr_to_idx(v::V) where {V} + vr_to_idx = Dict{eltype(V), Int}() + for (i, vr) in enumerate(v) + vr_to_idx[vr] = i + end + return vr_to_idx +end + +function _backend_matrix(::Nothing) + return SparseArrays.sparse +end + +function _backend_vector(::Nothing) + return Vector +end diff --git a/src/layers/bounded.jl b/src/layers/bounded.jl index 60e1079..9c73b63 100644 --- a/src/layers/bounded.jl +++ b/src/layers/bounded.jl @@ -27,7 +27,9 @@ function can_decompose(model::JuMP.Model, ::Type{BoundDecomposition}) return false end -function bounded_builder(decomposition::BoundDecomposition, proj_fn, dual_model::JuMP.Model; completion=:exact, μ=1.0) +function bounded_builder(decomposition::BoundDecomposition, proj_fn, dual_model::JuMP.Model; + completion=:exact, μ=1.0, backend=nothing + ) p_vars = get_p(dual_model, decomposition) y_vars = get_y_dual(dual_model, decomposition) zl_vars = only.(get_zl(dual_model, decomposition)) @@ -76,22 +78,24 @@ function bounded_builder(decomposition::BoundDecomposition, proj_fn, dual_model: error("Invalid completion type: $completion. Must be :exact or :log.") end + z_fn = VecAffExprMatrix( + zl_plus_zu, + [flatten_y(y_vars); p_vars]; + backend=backend + ) + obj_fn = QuadExprMatrix( + obj_func, + [flatten_y(y_vars); p_vars; zl_vars; zu_vars]; + backend=backend + ) return (y_pred, param_value) -> begin y_pred_proj = proj_fn(y_pred) - zl_plus_zu_val = JuMP.value.(vr -> _find_and_return_value(vr, - [reduce(vcat, y_vars), p_vars], - [reduce(vcat, y_pred_proj), param_value]), - zl_plus_zu - ) + zl_plus_zu_val = z_fn([flatten_y(y_pred_proj); param_value]) zl, zu = complete_zlzu(completer, zl_plus_zu_val) - JuMP.value.(vr -> _find_and_return_value(vr, - [reduce(vcat, y_vars), p_vars, zl_vars, zu_vars], - [reduce(vcat, y_pred_proj), param_value, zl, zu]), - obj_func - ) + obj_fn([flatten_y(y_pred_proj); param_value; zl; zu]) end end @@ -109,14 +113,15 @@ function complete_zlzu(::ExactBoundedCompletion, zl_plus_zu) return max.(zl_plus_zu, zero(eltype(zl_plus_zu))), -max.(-zl_plus_zu, zero(eltype(zl_plus_zu))) end -struct LogBoundedCompletion{T<:Real} <: BoundedCompletion +struct LogBoundedCompletion{V,T} <: BoundedCompletion μ::T - l::AbstractVector{T} - u::AbstractVector{T} + l::V + u::V end -function complete_zlzu(c::LogBoundedCompletion, zl_plus_zu) +Adapt.@adapt_structure LogBoundedCompletion +function complete_zlzu(c::LogBoundedCompletion{V,T}, zl_plus_zu) where {V,T} v = c.μ ./ (c.u - c.l) - w = eltype(zl_plus_zu)(1//2) .* zl_plus_zu + w = T(1//2) .* zl_plus_zu sqrtv2w2 = hypot.(v, w) return ( v + w + sqrtv2w2, diff --git a/src/layers/convex_qp.jl b/src/layers/convex_qp.jl index 0e9af92..9c22093 100644 --- a/src/layers/convex_qp.jl +++ b/src/layers/convex_qp.jl @@ -33,7 +33,9 @@ function can_decompose(model::JuMP.Model, ::Type{ConvexQP}) return true end -function convex_qp_builder(decomposition::ConvexQP, proj_fn, dual_model::JuMP.Model) +function convex_qp_builder(decomposition::ConvexQP, proj_fn, dual_model::JuMP.Model; + backend=nothing + ) p_vars = get_p(dual_model, decomposition) y_vars = get_y_dual(dual_model, decomposition) x_vars = get_x(decomposition) @@ -73,25 +75,28 @@ function convex_qp_builder(decomposition::ConvexQP, proj_fn, dual_model::JuMP.Mo end @assert isempty(idx_left) "Some z were not found in the model" - obj_func = JuMP.objective_function(dual_model) - Qinv = inv(Q) + + Qz_fn = VecAffExprMatrix( + Qz, + [flatten_y(y_vars); p_vars]; + backend=backend + ) + + obj_fn = QuadExprMatrix( + JuMP.objective_function(dual_model), + [flatten_y(y_vars); p_vars; z_vars]; + backend=backend + ) + return (y_pred, param_value) -> begin y_pred_proj = proj_fn(y_pred) - Qz_val = JuMP.value.(vr -> _find_and_return_value(vr, - [reduce(vcat, y_vars), p_vars], - [reduce(vcat, y_pred_proj), param_value]), - Qz - ) + Qz_val = Qz_fn([flatten_y(y_pred_proj); param_value]) z = Qinv * Qz_val - JuMP.value.(vr -> _find_and_return_value(vr, - [reduce(vcat, y_vars), p_vars, z_vars], - [reduce(vcat, y_pred_proj), param_value, z]), - obj_func - ) + obj_fn([flatten_y(y_pred_proj); param_value; z]) end end diff --git a/src/layers/generic.jl b/src/layers/generic.jl index cf79e03..01bd819 100644 --- a/src/layers/generic.jl +++ b/src/layers/generic.jl @@ -37,7 +37,7 @@ function jump_builder(decomposition::AbstractDecomposition, proj_fn::Function, d lock(completion_model.ext[:🔒]) try JuMP.set_parameter_value.(p_ref, param_value) - JuMP.set_parameter_value.(reduce(vcat, y_ref), reduce(vcat, proj_fn(y_pred))) + JuMP.set_parameter_value.(flatten_y(y_ref), flatten_y(proj_fn(y_pred))) JuMP.optimize!(completion_model) JuMP.assert_is_solved_and_feasible(completion_model) @@ -63,7 +63,7 @@ function _make_completion_model(decomposition::AbstractDecomposition, dual_model # mark y as parameters (optimizing over z only) p_ref = getindex.(ref_map, get_p(dual_model, decomposition)) y_ref = getindex.(ref_map, get_y_dual(dual_model, decomposition)) - y_ref_flat = reduce(vcat, y_ref) + y_ref_flat = flatten_y(y_ref) JuMP.@constraint(completion_model, y_ref_flat .∈ MOI.Parameter.(zeros(length(y_ref_flat)))) return completion_model, (p_ref, y_ref, ref_map) diff --git a/src/projection.jl b/src/projection.jl index c088bd9..f24ef99 100644 --- a/src/projection.jl +++ b/src/projection.jl @@ -31,4 +31,4 @@ function get_y_sets(dual_model, decomposition) isnothing(set) ? nothing : MOI.get(dual_model, MOI.ConstraintSet(), set) for set in get_y_constraint(dual_model, decomposition) ] -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 990aacc..0fca1dd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,3 @@ -import JSON, HTTP - using L2ODLL using Test @@ -8,19 +6,11 @@ import Clarabel, HiGHS import DifferentiationInterface, ForwardDiff import LinearAlgebra -function test_vectorize_bridge_matches_moi() - @test JSON.parse(String(HTTP.get( - "https://api.github.com/repos/jump-dev/MathOptInterface.jl/commits?path=src/Bridges/Constraint/bridges/VectorizeBridge.jl" - ).body))[1]["sha"] == "4e2630554afcde0b0b7c3e680e7fd3666e9e3825" -end - - randn_like(vecofvecs) = [randn(length(v)) for v in vecofvecs]; SOLVER = () -> ParametricOptInterface.Optimizer(HiGHS.Optimizer()); @testset "L2ODLL.jl" begin - test_vectorize_bridge_matches_moi() - @testset "Markowitz Frontier" begin + @testset "Markowitz Frontier (ConvexQP)" begin μ = [11.5; 9.5; 6] / 100 Σ = [ 166 34 58 @@ -59,9 +49,26 @@ SOLVER = () -> ParametricOptInterface.Optimizer(HiGHS.Optimizer()); cqp_y_true = JuMP.dual.(L2ODLL.get_y(m)) dobj1 = L2ODLL.dual_objective(m, cqp_y_true, param_value) @test isapprox(dobj1, JuMP.objective_value(m), atol=1e-6) + + batch_size = 3 + param_values = [[0.1], [0.2], [0.3]] + y_predicted_batch = [randn_like(L2ODLL.get_y_dual(m)) for _ in 1:batch_size] + + dobj_batch = L2ODLL.dual_objective.(m, y_predicted_batch, param_values) + dobj_grad_batch = L2ODLL.dual_objective_gradient.(m, y_predicted_batch, param_values) + + @test length(dobj_batch) == batch_size + @test length(dobj_grad_batch) == batch_size + + for i in 1:batch_size + individual_dobj = L2ODLL.dual_objective(m, y_predicted_batch[i], param_values[i]) + individual_dobj_grad = L2ODLL.dual_objective_gradient(m, y_predicted_batch[i], param_values[i]) + @test dobj_batch[i] ≈ individual_dobj atol=1e-6 + @test all(isapprox.(dobj_grad_batch[i], individual_dobj_grad, atol=1e-10)) + end end - @testset "Markowitz SecondOrderCone" begin + @testset "Markowitz SecondOrderCone (BoundDecomposition)" begin Σ = [ 166 34 58 34 64 4 @@ -108,5 +115,70 @@ SOLVER = () -> ParametricOptInterface.Optimizer(HiGHS.Optimizer()); dobj1 = L2ODLL.dual_objective(m, blp_y_true, param_value) @test isapprox(dobj1, JuMP.objective_value(m), atol=1e-6) + + batch_size = 3 + param_values = [[randn(N); rand()] for _ in 1:batch_size] + y_predicted_batch = [randn_like(L2ODLL.get_y_dual(m)) for _ in 1:batch_size] + + dobj_batch = L2ODLL.dual_objective.(m, y_predicted_batch, param_values) + dobj_grad_batch = L2ODLL.dual_objective_gradient.(m, y_predicted_batch, param_values) + + @test length(dobj_batch) == batch_size + @test length(dobj_grad_batch) == batch_size + + for i in 1:batch_size + individual_dobj = L2ODLL.dual_objective(m, y_predicted_batch[i], param_values[i]) + individual_dobj_grad = L2ODLL.dual_objective_gradient(m, y_predicted_batch[i], param_values[i]) + @test dobj_batch[i] ≈ individual_dobj atol=1e-6 + @test all(isapprox.(dobj_grad_batch[i], individual_dobj_grad, atol=1e-10)) + end + end + + @testset "Utilities" begin + @testset "flatten_y" begin + y_vec = [[1.0, 2.0], [3.0, 4.0, 5.0], [6.0]] + y_flat = L2ODLL.flatten_y(y_vec) + @test y_flat == [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + + y_shape = [2, 3, 1] + y_unflat = L2ODLL.unflatten_y(y_flat, y_shape) + @test y_unflat == y_vec + + original = [[1.0, 2.0, 3.0], [4.0], [5.0, 6.0]] + flattened = L2ODLL.flatten_y(original) + shape = length.(original) + reconstructed = L2ODLL.unflatten_y(flattened, shape) + @test reconstructed == original + + single_vec = [[1.0, 2.0, 3.0]] + single_flat = L2ODLL.flatten_y(single_vec) + @test single_flat == [1.0, 2.0, 3.0] + single_unflat = L2ODLL.unflatten_y(single_flat, [3]) + @test single_unflat == single_vec + + empty_vec = Vector{Float64}[] + @test_throws ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer") L2ODLL.flatten_y(empty_vec) + + empty_flat = Float64[] + empty_unflat = L2ODLL.unflatten_y(empty_flat, Int[]) + @test empty_unflat == Vector{Float64}[] + + mixed_vec = [[1.0, 2.0], Float64[], [3.0]] + mixed_flat = L2ODLL.flatten_y(mixed_vec) + @test mixed_flat == [1.0, 2.0, 3.0] + mixed_shape = [2, 0, 1] + mixed_unflat = L2ODLL.unflatten_y(mixed_flat, mixed_shape) + @test mixed_unflat == mixed_vec + + int_vec = [[1, 2], [3, 4, 5]] + int_flat = L2ODLL.flatten_y(int_vec) + @test int_flat == [1, 2, 3, 4, 5] + int_unflat = L2ODLL.unflatten_y(int_flat, [2, 3]) + @test int_unflat == int_vec + + mismatched_flat = [1.0, 2.0, 3.0] # length 3 + mismatched_shape = [2, 3] # expects length 5 + @test_throws BoundsError L2ODLL.unflatten_y(mismatched_flat, mismatched_shape) + end end end \ No newline at end of file