Skip to content

Commit

Permalink
fix tests on GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
frapac committed Sep 13, 2021
1 parent ee557cf commit 095716d
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 17 deletions.
28 changes: 24 additions & 4 deletions lib/MadNLPGPU/src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ end
function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, MT}) where {T, MT<:CuMatrix{T}}
length(kkt.ind_fixed) == 0 && return
aug = kkt.aug_com
d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
d_ind_fixed = kkt.ind_fixed
ndrange = (length(d_ind_fixed), size(aug, 1))
ev = _treat_fixed_variable_kernell!(CUDADevice())(aug, d_ind_fixed, ndrange=ndrange)
wait(ev)
Expand All @@ -62,7 +62,7 @@ end
#=
DenseKKTSystem kernels
=#
function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, x::AbstractVector) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
# Load buffers
haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
Expand All @@ -76,7 +76,7 @@ function MadNLP.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x
copyto!(y, d_y)
end

function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, x::AbstractVector) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
# Load buffers
haskey(kkt.etc, :jac_w1) || (kkt.etc[:jac_w1] = CuVector{T}(undef, size(kkt.jac, 1)))
haskey(kkt.etc, :jac_w2) || (kkt.etc[:jac_w2] = CuVector{T}(undef, size(kkt.jac, 2)))
Expand All @@ -90,7 +90,7 @@ function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}
copyto!(y, d_y)
end

function MadNLP.set_aug_diagonal!(kkt::MadNLP.DenseKKTSystem{T, VT, MT}, ips::MadNLP.Solver) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
function MadNLP.set_aug_diagonal!(kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}, ips::MadNLP.Solver) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
haskey(kkt.etc, :pr_diag_host) || (kkt.etc[:pr_diag_host] = Vector{T}(undef, length(kkt.pr_diag)))
pr_diag_h = kkt.etc[:pr_diag_host]::Vector{T}
# Broadcast is not working as MadNLP array are allocated on the CPU,
Expand Down Expand Up @@ -134,3 +134,23 @@ function MadNLP._build_dense_kkt_system!(
wait(ev)
end

@kernel function _compress_jacobian_kernel!(jac, ind_ineq, n)
i = @index(Global, Linear)
jac[i, i+n] = -1.0
end

function MadNLP.compress_jacobian!(kkt::MadNLP.DenseKKTSystem{T, VI, VT, MT}) where {T, VI<:CuVector{Int}, VT<:CuVector{T}, MT<:CuMatrix{T}}
m = size(kkt.jac, 1)
n = size(kkt.hess, 1)
ns = length(kkt.ind_ineq)
if ns != 0
# Add slack indexes
ndrange = ns
ev = _compress_jacobian_kernel!(CUDADevice())(kkt.jac, kkt.ind_ineq, n, ndrange=ndrange)
wait(ev)
end
# Scale
kkt.jac .*= kkt.jacobian_scaling
return
end

4 changes: 2 additions & 2 deletions lib/MadNLPGPU/test/densekkt_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function _compare_gpu_with_cpu(n, m, ind_fixed)
ns = length(ind_cons.ind_ineq)

# Init KKT on the GPU
kkt = MadNLP.DenseKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}}(
kkt = MadNLP.DenseKKTSystem{Float64, CuVector{Int}, CuVector{Float64}, CuMatrix{Float64}}(
nlp, ind_cons,
)
# Instantiate Solver with KKT on the GPU
Expand All @@ -34,7 +34,7 @@ function _compare_gpu_with_cpu(n, m, ind_fixed)
@test h_ips.l d_ips.l atol=1e-10
end

@testset "MadNLP: dense versus sparse" begin
@testset "MadNLPGPU: compare GPU implementation with CPU implementation" begin
@testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
_compare_gpu_with_cpu(n, m, Int[])
end
Expand Down
3 changes: 2 additions & 1 deletion src/interiorpointsolver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,9 @@ function Solver(nlp::AbstractNLPModel, kkt=nothing;
SparseUnreducedKKTSystem{Float64, MT}(nlp, ind_cons)
elseif opt.kkt_system == DENSE_KKT_SYSTEM
MT = Matrix{Float64}
VI = Vector{Int}
VT = Vector{Float64}
DenseKKTSystem{Float64, VT, MT}(nlp, ind_cons)
DenseKKTSystem{Float64, VI, VT, MT}(nlp, ind_cons)
end
end

Expand Down
23 changes: 13 additions & 10 deletions src/kktsystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ is_reduced(::SparseUnreducedKKTSystem) = false
DenseKKTSystem
=#

struct DenseKKTSystem{T, VT, MT} <: AbstractKKTSystem{T, MT}
struct DenseKKTSystem{T, VI, VT, MT} <: AbstractKKTSystem{T, MT}
hess::MT
jac::MT
pr_diag::VT
Expand All @@ -412,14 +412,14 @@ struct DenseKKTSystem{T, VT, MT} <: AbstractKKTSystem{T, MT}
# KKT system
aug_com::MT
# Info
ind_ineq::Vector{Int}
ind_fixed::Vector{Int}
ind_ineq::VI
ind_fixed::VI
jacobian_scaling::VT
# Buffers
etc::Dict{Symbol, Any}
end

function DenseKKTSystem{T, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VT, MT}
function DenseKKTSystem{T, VI, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VI, VT, MT}
ns = length(ind_ineq)
hess = MT(undef, n, n)
jac = MT(undef, m, n+ns)
Expand All @@ -446,14 +446,17 @@ function DenseKKTSystem{T, VT, MT}(n, m, ind_ineq, ind_fixed) where {T, VT, MT}
fill!(diag_hess, zero(T))
fill!(jacobian_scaling, one(T))

return DenseKKTSystem{T, VT, MT}(
d_ind_ineq = ind_ineq |> VI
d_ind_fixed = ind_fixed |> VI

return DenseKKTSystem{T, VI, VT, MT}(
hess, jac, pr_diag, du_diag, diag_hess, aug_com,
ind_ineq, ind_fixed, jacobian_scaling, Dict{Symbol, Any}(),
d_ind_ineq, d_ind_fixed, jacobian_scaling, Dict{Symbol, Any}(),
)
end

function DenseKKTSystem{T, VT, MT}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp); options...) where {T, VT, MT}
return DenseKKTSystem{T, VT, MT}(
function DenseKKTSystem{T, VI, VT, MT}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp); options...) where {T, VI, VT, MT}
return DenseKKTSystem{T, VI, VT, MT}(
get_nvar(nlp), get_ncon(nlp), info_constraints.ind_ineq, info_constraints.ind_fixed; options...
)
end
Expand Down Expand Up @@ -503,7 +506,7 @@ function _build_dense_kkt_system!(dest, hess, jac, pr_diag, du_diag, diag_hess,
end
end

function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
function build_kkt!(kkt::DenseKKTSystem{T, VI, VT, MT}) where {T, VI, VT, MT}
n = size(kkt.hess, 1)
m = size(kkt.jac, 1)
ns = length(kkt.ind_ineq)
Expand All @@ -515,7 +518,7 @@ function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
treat_fixed_variable!(kkt)
end

function compress_jacobian!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
function compress_jacobian!(kkt::DenseKKTSystem{T, VI, VT, MT}) where {T, VI, VT, MT}
m = size(kkt.jac, 1)
n = size(kkt.hess, 1)
# Add slack indexes
Expand Down

0 comments on commit 095716d

Please sign in to comment.