Skip to content

Commit

Permalink
Clean up methods for computing matrix that corresponds to `linear_ope…
Browse files Browse the repository at this point in the history
…ration!` used by MultigridSolver (#2885)

* make bcs known to initialize_matrix methods

* fix mistake

* initialize_matrix -> compute_matrix_for_linear_operation + docs

* one less scalar operation

* update method name in comment

* fix boundary_conditions kwarg

* add grid alias

* nuke make_column; use vec
  • Loading branch information
navidcy committed Mar 28, 2023
1 parent f4bbce1 commit bcc34f0
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function MGImplicitFreeSurfaceSolver(grid::AbstractGrid,
settings[:reltol] = get(settings, :reltol, min(1e-7, sqrt(eps(eltype(grid)))))

# initialize solver with Δt = nothing so that linear matrix is not computed;
# see `initialize_matrix` methods
# see `compute_matrix_for_linear_operation` methods
solver = MultigridSolver(matrix;
template_field = right_hand_side,
settings...)
Expand Down
1 change: 1 addition & 0 deletions src/Solvers/Solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export
using Statistics
using FFTW
using CUDA
using SparseArrays
using KernelAbstractions
using KernelAbstractions.Extras.LoopInfo: @unroll

Expand Down
113 changes: 112 additions & 1 deletion src/Solvers/matrix_solver_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ end
@inline function validate_laplacian_direction(N, topo, reduced_dim)
dim = N > 1 && reduced_dim == false
if N < 3 && topo == Bounded && dim == true
throw(ArgumentError("Cannot calculate laplacian in bounded domain with N < 3!"))
throw(ArgumentError("Cannot calculate Laplacian in bounded domain with N < 3!"))
end

return dim
Expand All @@ -95,3 +95,114 @@ end
@inline validate_laplacian_size(N, dim) = dim == true ? N : 1

@inline ensure_diagonal_elements_are_present!(A) = fkeep!(A, (i, j, x) -> (i == j || !iszero(x)))

"""
compute_matrix_for_linear_operation(arch, template_field, linear_operation!, args...;
boundary_conditions_input=nothing,
boundary_conditions_output=nothing)
Return the sparse matrix that corresponds to the `linear_operation!`. The `linear_operation!`
is expected to have the argument structure:
```julia
linear_operation!(output, input, args...)
```
Keyword arguments `boundary_conditions_input` and `boundary_conditions_output` determine the
boundary conditions that the `input` and `output` fields are expected to have. If `nothing`
is provided, then the `input` and `output` fields inherit the default boundary conditions
according to the `template_field`.
For `architecture = CPU()` the matrix returned is a `SparseArrays.SparseMatrixCSC`; for `GPU()`
is a `CUDA.CuSparseMatrixCSC`.
"""
function compute_matrix_for_linear_operation(::CPU, template_field, linear_operation!, args...;
boundary_conditions_input=nothing,
boundary_conditions_output=nothing)

loc = location(template_field)
Nx, Ny, Nz = size(template_field)
grid = template_field.grid

# allocate matrix A
A = spzeros(eltype(grid), Nx*Ny*Nz, Nx*Ny*Nz)

if boundary_conditions_input === nothing
boundary_conditions_input = FieldBoundaryConditions(grid, loc, template_field.indices)
end

if boundary_conditions_output === nothing
boundary_conditions_output = FieldBoundaryConditions(grid, loc, template_field.indices)
end

# allocate fields eᵢⱼₖ and Aeᵢⱼₖ = A*eᵢⱼₖ
eᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_input)
Aeᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_output)

for k in 1:Nz, j in 1:Ny, i in 1:Nx
parent(eᵢⱼₖ) .= 0
parent(Aeᵢⱼₖ) .= 0

eᵢⱼₖ[i, j, k] = 1

fill_halo_regions!(eᵢⱼₖ)

linear_operation!(Aeᵢⱼₖ, eᵢⱼₖ, args...)

A[:, Ny*Nx*(k-1) + Nx*(j-1) + i] .= vec(Aeᵢⱼₖ)
end

return A
end

function compute_matrix_for_linear_operation(::GPU, template_field, linear_operation!, args...;
boundary_conditions_input=nothing,
boundary_conditions_output=nothing)

loc = location(template_field)
Nx, Ny, Nz = size(template_field)
grid = template_field.grid

if boundary_conditions_input === nothing
boundary_conditions_input = FieldBoundaryConditions(grid, loc, template_field.indices)
end

if boundary_conditions_output === nothing
boundary_conditions_output = FieldBoundaryConditions(grid, loc, template_field.indices)
end

# allocate fields eᵢⱼₖ and Aeᵢⱼₖ = A*eᵢⱼₖ; A is the matrix to be computed
eᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_input)
Aeᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_output)

colptr = CuArray{Int}(undef, Nx*Ny*Nz + 1)
rowval = CuArray{Int}(undef, 0)
nzval = CuArray{eltype(grid)}(undef, 0)

CUDA.@allowscalar colptr[1] = 1

for k in 1:Nz, j in 1:Ny, i in 1:Nx
parent(eᵢⱼₖ) .= 0
parent(Aeᵢⱼₖ) .= 0

CUDA.@allowscalar eᵢⱼₖ[i, j, k] = 1

fill_halo_regions!(eᵢⱼₖ)

linear_operation!(Aeᵢⱼₖ, eᵢⱼₖ, args...)

count = 0
for n in 1:Nz, m in 1:Ny, l in 1:Nx
Aeᵢⱼₖₗₘₙ = CUDA.@allowscalar Aeᵢⱼₖ[l, m, n]
if Aeᵢⱼₖₗₘₙ != 0
append!(rowval, Ny*Nx*(n-1) + Nx*(m-1) + l)
append!(nzval, Aeᵢⱼₖₗₘₙ)
count += 1
end
end

CUDA.@allowscalar colptr[Ny*Nx*(k-1) + Nx*(j-1) + i + 1] = colptr[Ny*Nx*(k-1) + Nx*(j-1) + i] + count
end

return CuSparseMatrixCSC(colptr, rowval, nzval, (Nx*Ny*Nz, Nx*Ny*Nz))
end
58 changes: 1 addition & 57 deletions src/Solvers/multigrid_solver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function MultigridSolver(linear_operation!::Function,

(arch == GPU() && !hasamgx) && error("Multigrid on the GPU requires a linux operating system due to AMGX.jl")

matrix = initialize_matrix(arch, template_field, linear_operation!, args...)
matrix = compute_matrix_for_linear_operation(arch, template_field, linear_operation!, args...)

return MultigridSolver(matrix; template_field, maxiter, reltol, abstol, amg_algorithm)
end
Expand Down Expand Up @@ -222,62 +222,6 @@ end
@inline create_multilevel(::RugeStubenAMG, A) = ruge_stuben(A)
@inline create_multilevel(::SmoothedAggregationAMG, A) = smoothed_aggregation(A)

function initialize_matrix(::CPU, template_field, linear_operator!, args...)
Nx, Ny, Nz = size(template_field)
A = spzeros(eltype(template_field.grid), Nx*Ny*Nz, Nx*Ny*Nz)
make_column(f) = reshape(interior(f), Nx*Ny*Nz)

eᵢⱼₖ = similar(template_field)
∇²eᵢⱼₖ = similar(template_field)

for k = 1:Nz, j in 1:Ny, i in 1:Nx
parent(eᵢⱼₖ) .= 0
parent(∇²eᵢⱼₖ) .= 0
eᵢⱼₖ[i, j, k] = 1
fill_halo_regions!(eᵢⱼₖ)
linear_operator!(∇²eᵢⱼₖ, eᵢⱼₖ, args...)

A[:, Ny*Nx*(k-1) + Nx*(j-1) + i] .= make_column(∇²eᵢⱼₖ)
end

return A
end

function initialize_matrix(::GPU, template_field, linear_operator!, args...)
Nx, Ny, Nz = size(template_field)
FT = eltype(template_field.grid)

make_column(f) = reshape(interior(f), Nx*Ny*Nz)

eᵢⱼₖ = similar(template_field)
∇²eᵢⱼₖ = similar(template_field)

colptr = CuArray{Int}(undef, Nx*Ny*Nz+1)
rowval = CuArray{Int}(undef, 0)
nzval = CuArray{FT}(undef, 0)

CUDA.@allowscalar colptr[1] = 1

for k = 1:Nz, j in 1:Ny, i in 1:Nx
parent(eᵢⱼₖ) .= 0
parent(∇²eᵢⱼₖ) .= 0
CUDA.@allowscalar eᵢⱼₖ[i, j, k] = 1
fill_halo_regions!(eᵢⱼₖ)
linear_operator!(∇²eᵢⱼₖ, eᵢⱼₖ, args...)
count = 0
for n = 1:Nz, m in 1:Ny, l in 1:Nx
CUDA.@allowscalar if ∇²eᵢⱼₖ[l, m, n] != 0
append!(rowval, Ny*Nx*(n-1) + Nx*(m-1) + l)
CUDA.@allowscalar append!(nzval, ∇²eᵢⱼₖ[l, m, n])
count += 1
end
end
CUDA.@allowscalar colptr[Ny*Nx*(k-1) + Nx*(j-1) + i + 1] = colptr[Ny*Nx*(k-1) + Nx*(j-1) + i] + count
end

return CuSparseMatrixCSC(colptr, rowval, nzval, (Nx*Ny*Nz, Nx*Ny*Nz))
end

"""
solve!(x, solver::MultigridSolver, b; kwargs...)
Expand Down

0 comments on commit bcc34f0

Please sign in to comment.