fix tests on GPU

MadNLP · Sep 13, 2021 · 095716d · 095716d
1 parent ee557cf
commit 095716d
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 17 deletions.
diff --git a/lib/MadNLPGPU/src/kernels.jl b/lib/MadNLPGPU/src/kernels.jl
@@ -53,7 +53,7 @@ end
 function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, MT}) where {T, MT<:CuMatrix{T}}
     length(kkt.ind_fixed) == 0 && return
     aug = kkt.aug_com
-    d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
+    d_ind_fixed = kkt.ind_fixed
     ndrange = (length(d_ind_fixed), size(aug, 1))
     ev = _treat_fixed_variable_kernell!(CUDADevice())(aug, d_ind_fixed, ndrange=ndrange)
     wait(ev)
@@ -62,7 +62,7 @@ end
 #=
     DenseKKTSystem kernels
 =#
-function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
+function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, x::AbstractVector) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
     # Load buffers
     haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
     haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
@@ -76,7 +76,7 @@ function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x
     copyto!(y, d_y)
 end
 
-function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
+function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, x::AbstractVector) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
     # Load buffers
     haskey(kkt.etc, :jac_w1) || (kkt.etc[:jac_w1] = CuVector{T}(undef, size(kkt.jac, 1)))
     haskey(kkt.etc, :jac_w2) || (kkt.etc[:jac_w2] = CuVector{T}(undef, size(kkt.jac, 2)))
@@ -90,7 +90,7 @@ function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}
     copyto!(y, d_y)
 end
 
-function MadNLP.set_aug_diagonal!(kkt::MadNLP.DenseKKTSystem{T, VT, MT}, ips::MadNLP.Solver) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
+function MadNLP.set_aug_diagonal!(kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, ips::MadNLP.Solver) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
     haskey(kkt.etc, :pr_diag_host) || (kkt.etc[:pr_diag_host] = Vector{T}(undef, length(kkt.pr_diag)))
     pr_diag_h = kkt.etc[:pr_diag_host]::Vector{T}
     # Broadcast is not working as MadNLP array are allocated on the CPU,
@@ -134,3 +134,23 @@ function MadNLP._build_dense_kkt_system!(
     wait(ev)
 end
 
+@kernel function _compress_jacobian_kernel!(jac, ind_ineq, n)
+    i = @index(Global, Linear)
+    jac[i, i+n] = -1.0
+end
+
+function MadNLP.compress_jacobian!(kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
+    m = size(kkt.jac, 1)
+    n = size(kkt.hess, 1)
+    ns = length(kkt.ind_ineq)
+    if ns != 0
+        # Add slack indexes
+        ndrange = ns
+        ev = _compress_jacobian_kernel!(CUDADevice())(kkt.jac, kkt.ind_ineq, n, ndrange=ndrange)
+        wait(ev)
+    end
+    # Scale
+    kkt.jac .*= kkt.jacobian_scaling
+    return
+end
+
diff --git a/lib/MadNLPGPU/test/densekkt_gpu.jl b/lib/MadNLPGPU/test/densekkt_gpu.jl
@@ -20,7 +20,7 @@ function _compare_gpu_with_cpu(n, m, ind_fixed)
     ns = length(ind_cons.ind_ineq)
 
     # Init KKT on the GPU
-    kkt = MadNLP.DenseKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}}(
+    kkt = MadNLP.DenseKKTSystem{Float64, CuVector{Int}, CuVector{Float64}, CuMatrix{Float64}}(
         nlp, ind_cons,
     )
     # Instantiate Solver with KKT on the GPU
@@ -34,7 +34,7 @@ function _compare_gpu_with_cpu(n, m, ind_fixed)
     @test h_ips.l ≈ d_ips.l atol=1e-10
 end
 
-@testset "MadNLP: dense versus sparse" begin
+@testset "MadNLPGPU: compare GPU implementation with CPU implementation" begin
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
         _compare_gpu_with_cpu(n, m, Int[])
     end

diff --git a/src/interiorpointsolver.jl b/src/interiorpointsolver.jl
@@ -401,8 +401,9 @@ function Solver(nlp::AbstractNLPModel, kkt=nothing;
             SparseUnreducedKKTSystem{Float64, MT}(nlp, ind_cons)
         elseif opt.kkt_system == DENSE_KKT_SYSTEM
             MT = Matrix{Float64}
+            VI = Vector{Int}
             VT = Vector{Float64}
-            DenseKKTSystem{Float64, VT, MT}(nlp, ind_cons)
+            DenseKKTSystem{Float64, VI, VT, MT}(nlp, ind_cons)
         end
     end
 

diff --git a/src/kktsystem.jl b/src/kktsystem.jl
@@ -403,7 +403,7 @@ is_reduced(::SparseUnreducedKKTSystem) = false
     DenseKKTSystem
 =#
 
-struct DenseKKTSystem{T, VT, MT} <: AbstractKKTSystem{T, MT}
+struct DenseKKTSystem{T, VI, VT, MT} <: AbstractKKTSystem{T, MT}
     hess::MT
     jac::MT
     pr_diag::VT
@@ -412,14 +412,14 @@ struct DenseKKTSystem{T, VT, MT} <: AbstractKKTSystem{T, MT}
     # KKT system
     aug_com::MT
     # Info
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
+    ind_ineq::VI
+    ind_fixed::VI
     jacobian_scaling::VT
     # Buffers
     etc::Dict{Symbol, Any}
 end
 
-function DenseKKTSystem{T, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VT, MT}
+function DenseKKTSystem{T, VI, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VI, VT, MT}
     ns = length(ind_ineq)
     hess = MT(undef, n, n)
     jac = MT(undef, m, n+ns)
@@ -446,14 +446,17 @@ function DenseKKTSystem{T, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VT, MT}
     fill!(diag_hess, zero(T))
     fill!(jacobian_scaling, one(T))
 
-    return DenseKKTSystem{T, VT, MT}(
+    d_ind_ineq = ind_ineq |> VI
+    d_ind_fixed = ind_fixed |> VI
+
+    return DenseKKTSystem{T, VI, VT, MT}(
         hess, jac, pr_diag, du_diag, diag_hess, aug_com,
-        ind_ineq, ind_fixed, jacobian_scaling, Dict{Symbol, Any}(),
+        d_ind_ineq, d_ind_fixed, jacobian_scaling, Dict{Symbol, Any}(),
     )
 end
 
-function DenseKKTSystem{T, VT, MT}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp); options...) where {T, VT, MT}
-    return DenseKKTSystem{T, VT, MT}(
+function DenseKKTSystem{T, VI, VT, MT}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp); options...) where {T, VI, VT, MT}
+    return DenseKKTSystem{T, VI, VT, MT}(
         get_nvar(nlp), get_ncon(nlp), info_constraints.ind_ineq, info_constraints.ind_fixed; options...
     )
 end
@@ -503,7 +506,7 @@ function _build_dense_kkt_system!(dest, hess, jac, pr_diag, du_diag, diag_hess,
     end
 end
 
-function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
+function build_kkt!(kkt::DenseKKTSystem{T, VI, VT, MT}) where {T, VI, VT, MT}
     n = size(kkt.hess, 1)
     m = size(kkt.jac, 1)
     ns = length(kkt.ind_ineq)
@@ -515,7 +518,7 @@ function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
     treat_fixed_variable!(kkt)
 end
 
-function compress_jacobian!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
+function compress_jacobian!(kkt::DenseKKTSystem{T, VI, VT, MT}) where {T, VI, VT, MT}
     m = size(kkt.jac, 1)
     n = size(kkt.hess, 1)
     # Add slack indexes