Clean up methods for computing matrix that corresponds to `linear_ope…

…ration!` used by MultigridSolver (#2885) * make bcs known to initialize_matrix methods * fix mistake * initialize_matrix -> compute_matrix_for_linear_operation + docs * one less scalar operation * update method name in comment * fix boundary_conditions kwarg * add grid alias * nuke make_column; use vec
CliMA · Mar 28, 2023 · bcc34f0 · bcc34f0
1 parent f4bbce1
commit bcc34f0
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 59 deletions.
diff --git a/src/Models/HydrostaticFreeSurfaceModels/mg_implicit_free_surface_solver.jl b/src/Models/HydrostaticFreeSurfaceModels/mg_implicit_free_surface_solver.jl
@@ -96,7 +96,7 @@ function MGImplicitFreeSurfaceSolver(grid::AbstractGrid,
     settings[:reltol] = get(settings, :reltol, min(1e-7, sqrt(eps(eltype(grid)))))
 
     # initialize solver with Δt = nothing so that linear matrix is not computed;
-    # see `initialize_matrix` methods
+    # see `compute_matrix_for_linear_operation` methods
     solver = MultigridSolver(matrix;
                              template_field = right_hand_side,
                              settings...)

diff --git a/src/Solvers/Solvers.jl b/src/Solvers/Solvers.jl
@@ -11,6 +11,7 @@ export
 using Statistics
 using FFTW
 using CUDA
+using SparseArrays
 using KernelAbstractions
 using KernelAbstractions.Extras.LoopInfo: @unroll
 

diff --git a/src/Solvers/matrix_solver_utils.jl b/src/Solvers/matrix_solver_utils.jl
@@ -86,7 +86,7 @@ end
 @inline function validate_laplacian_direction(N, topo, reduced_dim)  
     dim = N > 1 && reduced_dim == false
     if N < 3 && topo == Bounded && dim == true
-        throw(ArgumentError("Cannot calculate laplacian in bounded domain with N < 3!"))
+        throw(ArgumentError("Cannot calculate Laplacian in bounded domain with N < 3!"))
     end
 
     return dim
@@ -95,3 +95,114 @@ end
 @inline validate_laplacian_size(N, dim) = dim == true ? N : 1
 
 @inline ensure_diagonal_elements_are_present!(A) = fkeep!(A, (i, j, x) -> (i == j || !iszero(x)))
+
+"""
+    compute_matrix_for_linear_operation(arch, template_field, linear_operation!, args...;
+                                        boundary_conditions_input=nothing,
+                                        boundary_conditions_output=nothing)
+
+Return the sparse matrix that corresponds to the `linear_operation!`. The `linear_operation!`
+is expected to have the argument structure:
+
+```julia
+linear_operation!(output, input, args...)
+```
+
+Keyword arguments `boundary_conditions_input` and `boundary_conditions_output` determine the
+boundary conditions that the `input` and `output` fields are expected to have. If `nothing`
+is provided, then the `input` and `output` fields inherit the default boundary conditions
+according to the `template_field`.
+
+For `architecture = CPU()` the matrix returned is a `SparseArrays.SparseMatrixCSC`; for `GPU()`
+is a `CUDA.CuSparseMatrixCSC`.
+"""
+function compute_matrix_for_linear_operation(::CPU, template_field, linear_operation!, args...;
+                                             boundary_conditions_input=nothing,
+                                             boundary_conditions_output=nothing)
+
+    loc = location(template_field)
+    Nx, Ny, Nz = size(template_field)
+    grid = template_field.grid
+
+    # allocate matrix A
+    A = spzeros(eltype(grid), Nx*Ny*Nz, Nx*Ny*Nz)
+
+    if boundary_conditions_input === nothing
+        boundary_conditions_input = FieldBoundaryConditions(grid, loc, template_field.indices)
+    end
+
+    if boundary_conditions_output === nothing
+        boundary_conditions_output = FieldBoundaryConditions(grid, loc, template_field.indices)
+    end
+
+    # allocate fields eᵢⱼₖ and Aeᵢⱼₖ = A*eᵢⱼₖ
+     eᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_input)
+    Aeᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_output)
+
+    for k in 1:Nz, j in 1:Ny, i in 1:Nx
+        parent(eᵢⱼₖ)  .= 0
+        parent(Aeᵢⱼₖ) .= 0
+
+        eᵢⱼₖ[i, j, k] = 1
+
+        fill_halo_regions!(eᵢⱼₖ)
+
+        linear_operation!(Aeᵢⱼₖ, eᵢⱼₖ, args...)
+
+        A[:, Ny*Nx*(k-1) + Nx*(j-1) + i] .= vec(Aeᵢⱼₖ)
+    end
+
+    return A
+end
+
+function compute_matrix_for_linear_operation(::GPU, template_field, linear_operation!, args...;
+                                             boundary_conditions_input=nothing,
+                                             boundary_conditions_output=nothing)
+
+    loc = location(template_field)
+    Nx, Ny, Nz = size(template_field)
+    grid = template_field.grid
+
+    if boundary_conditions_input === nothing
+        boundary_conditions_input = FieldBoundaryConditions(grid, loc, template_field.indices)
+    end
+
+    if boundary_conditions_output === nothing
+        boundary_conditions_output = FieldBoundaryConditions(grid, loc, template_field.indices)
+    end
+
+    # allocate fields eᵢⱼₖ and Aeᵢⱼₖ = A*eᵢⱼₖ; A is the matrix to be computed
+     eᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_input)
+    Aeᵢⱼₖ = Field(loc, grid; boundary_conditions=boundary_conditions_output)
+
+    colptr = CuArray{Int}(undef, Nx*Ny*Nz + 1)
+    rowval = CuArray{Int}(undef, 0)
+    nzval  = CuArray{eltype(grid)}(undef, 0)
+
+    CUDA.@allowscalar colptr[1] = 1
+
+    for k in 1:Nz, j in 1:Ny, i in 1:Nx
+        parent(eᵢⱼₖ)  .= 0
+        parent(Aeᵢⱼₖ) .= 0
+
+        CUDA.@allowscalar eᵢⱼₖ[i, j, k] = 1
+
+        fill_halo_regions!(eᵢⱼₖ)
+
+        linear_operation!(Aeᵢⱼₖ, eᵢⱼₖ, args...)
+
+        count = 0
+        for n in 1:Nz, m in 1:Ny, l in 1:Nx
+            Aeᵢⱼₖₗₘₙ = CUDA.@allowscalar Aeᵢⱼₖ[l, m, n]
+            if Aeᵢⱼₖₗₘₙ != 0
+                append!(rowval, Ny*Nx*(n-1) + Nx*(m-1) + l)
+                append!(nzval, Aeᵢⱼₖₗₘₙ)
+                count += 1
+            end
+        end
+
+        CUDA.@allowscalar colptr[Ny*Nx*(k-1) + Nx*(j-1) + i + 1] = colptr[Ny*Nx*(k-1) + Nx*(j-1) + i] + count
+    end
+
+    return CuSparseMatrixCSC(colptr, rowval, nzval, (Nx*Ny*Nz, Nx*Ny*Nz))
+end
diff --git a/src/Solvers/multigrid_solver.jl b/src/Solvers/multigrid_solver.jl
@@ -103,7 +103,7 @@ function MultigridSolver(linear_operation!::Function,
 
     (arch == GPU() && !hasamgx) && error("Multigrid on the GPU requires a linux operating system due to AMGX.jl")
 
-    matrix = initialize_matrix(arch, template_field, linear_operation!, args...)
+    matrix = compute_matrix_for_linear_operation(arch, template_field, linear_operation!, args...)
 
     return  MultigridSolver(matrix; template_field, maxiter, reltol, abstol, amg_algorithm)
 end
@@ -222,62 +222,6 @@ end
 @inline create_multilevel(::RugeStubenAMG, A) = ruge_stuben(A)
 @inline create_multilevel(::SmoothedAggregationAMG, A) = smoothed_aggregation(A)
 
-function initialize_matrix(::CPU, template_field, linear_operator!, args...)
-    Nx, Ny, Nz = size(template_field)
-    A = spzeros(eltype(template_field.grid), Nx*Ny*Nz, Nx*Ny*Nz)
-    make_column(f) = reshape(interior(f), Nx*Ny*Nz)
-
-    eᵢⱼₖ = similar(template_field)
-    ∇²eᵢⱼₖ = similar(template_field)
-
-    for k = 1:Nz, j in 1:Ny, i in 1:Nx
-        parent(eᵢⱼₖ) .= 0
-        parent(∇²eᵢⱼₖ) .= 0
-        eᵢⱼₖ[i, j, k] = 1
-        fill_halo_regions!(eᵢⱼₖ)
-        linear_operator!(∇²eᵢⱼₖ, eᵢⱼₖ, args...)
-
-        A[:, Ny*Nx*(k-1) + Nx*(j-1) + i] .= make_column(∇²eᵢⱼₖ)
-    end
-
-    return A
-end
-
-function initialize_matrix(::GPU, template_field, linear_operator!, args...)
-    Nx, Ny, Nz = size(template_field)
-    FT = eltype(template_field.grid)
-
-    make_column(f) = reshape(interior(f), Nx*Ny*Nz)
-
-    eᵢⱼₖ = similar(template_field)
-    ∇²eᵢⱼₖ = similar(template_field)
-
-    colptr = CuArray{Int}(undef, Nx*Ny*Nz+1)
-    rowval = CuArray{Int}(undef, 0)
-    nzval  = CuArray{FT}(undef, 0)
-
-    CUDA.@allowscalar colptr[1] = 1
-
-    for k = 1:Nz, j in 1:Ny, i in 1:Nx
-        parent(eᵢⱼₖ) .= 0
-        parent(∇²eᵢⱼₖ) .= 0
-        CUDA.@allowscalar eᵢⱼₖ[i, j, k] = 1
-        fill_halo_regions!(eᵢⱼₖ)
-        linear_operator!(∇²eᵢⱼₖ, eᵢⱼₖ, args...)
-        count = 0
-        for n = 1:Nz, m in 1:Ny, l in 1:Nx
-            CUDA.@allowscalar if ∇²eᵢⱼₖ[l, m, n] != 0
-                append!(rowval, Ny*Nx*(n-1) + Nx*(m-1) + l)
-                CUDA.@allowscalar append!(nzval, ∇²eᵢⱼₖ[l, m, n])
-                count += 1
-            end
-        end
-        CUDA.@allowscalar colptr[Ny*Nx*(k-1) + Nx*(j-1) + i + 1] = colptr[Ny*Nx*(k-1) + Nx*(j-1) + i] + count
-    end
-
-    return CuSparseMatrixCSC(colptr, rowval, nzval, (Nx*Ny*Nz, Nx*Ny*Nz))
-end
-
 """
     solve!(x, solver::MultigridSolver, b; kwargs...)