# Setup

In [1]:
import Pkg;
Pkg.activate(@__DIR__)
Pkg.status()

[32m[1m  Activating[22m[39m project at `/global/u1/b/blaschke/juliacon24-hpcworkshop/parts/mpi/explanation`


[32m[1mStatus[22m[39m `/global/u1/b/blaschke/juliacon24-hpcworkshop/parts/mpi/explanation/Project.toml`
  [90m[1520ce14] [39mAbstractTrees v0.4.5
  [90m[052768ef] [39mCUDA v5.4.2
  [90m[adafc99b] [39mCpuId v0.3.1
  [90m[0e44f5e4] [39mHwloc v3.0.1
  [90m[da04e1cc] [39mMPI v0.20.20
  [90m[e7922434] [39mMPIClusterManagers v0.2.4
  [90m[6f74fd91] [39mNetworkInterfaceControllers v0.1.0


In [2]:
using MPI

using NetworkInterfaceControllers, Sockets
interfaces = NetworkInterfaceControllers.get_interface_data(IPv4)

hsn0_public = filter(x->(x.name=="hsn0:chn" && x.version==:v4), interfaces) |> only 
public_slingshot_name = getnameinfo(hsn0_public.ip)

"nid200360-hsn0"

In [3]:
# to import MPIManager
using MPIClusterManagers

# need to also import Distributed to use addprocs()
using Distributed

# specify, number of mpi workers, launch cmd, etc.
manager=MPIWorkerManager(4)

# start mpi workers and add them as julia workers too.
addprocs(
    manager,
    exeflags=`--project=$(Base.active_project())`,
    master_tcp_interface=public_slingshot_name
)

4-element Vector{Int64}:
 2
 3
 4
 5

In [5]:
@mpi_do manager begin
    using MPI: MPI, Comm, Win, free
    comm = MPI.COMM_WORLD
    rank = MPI.Comm_rank(comm)
    mpi_size = MPI.Comm_size(comm) # don't use "size" as this overwrites the `size` function
    name = gethostname()
    println("Hello world, I am $(rank) of $(mpi_size) on $(name)")
end

      From worker 5:	Hello world, I am 3 of 4 on nid200365
      From worker 2:	Hello world, I am 0 of 4 on nid200360
      From worker 4:	Hello world, I am 2 of 4 on nid200364
      From worker 3:	Hello world, I am 1 of 4 on nid200361


KeyError: KeyError: key "usage_request" not found

KeyError: KeyError: key "usage_request" not found

In [6]:
@mpi_do manager begin
    dims = [0]
    MPI.Dims_create!(mpi_size, dims)
end

In [7]:
@mpi_do manager begin
    comm_cart = MPI.Cart_create(
        comm,  # MPI Communicator
        dims,  # Dimensions of grid
        [0],   # 0 == not periodic, 1 == periodic
        1,     # 0 == not allowed to reorder, 1 == allowed to reoder
    )
    me        = MPI.Comm_rank(comm_cart)
    coords    = MPI.Cart_coords(comm_cart)
    neighbors = MPI.Cart_shift(
        comm_cart,
        0,  # Which dimension to shift (zero-indexed)
        1,  # Shift magnitude
    )
end

## Halo Exchange

When cast into the discrete form:

$$
\partial_t x = -D \mathrm{div}(\mathrm{grad}(x)) \\
\Delta_t x = -D \frac{q_i - q_{i-1}}{\Delta s} = \frac{(x_{i+1} - x_i) - (x_{i} - x_{i-1})}{(\Delta s)^2} = \frac{x_{i+1} + 2 x_i - x_{i-1}}{(\Delta s)^2}
$$

The diffusion equation has a stencil width of 2, but the necessary halo only needs 1 cell to be transferred:

![1D_halo](l8_1D_global_grid.png)

In 2D this will look as follows:

![2D_halo](diffusion_2d_halo_exchange.png)

In the previous example we exchanged `Int64`, now we're going to tranfer `Float64`

In [8]:
@mpi_do manager begin
    send_1 = zeros(Float64, 1)
    send_2 = zeros(Float64, 1)
    recv_1 = zeros(Float64, 1)
    recv_2 = zeros(Float64, 1)
end

In [244]:
@mpi_do manager begin
    function halo_exchange!(A)
        # Copy to buffers
        (neighbors[1] != MPI.PROC_NULL) && copyto!(send_1, A[2:2, 1])
        (neighbors[2] != MPI.PROC_NULL) && copyto!(send_2, A[(end-1):(end-1), 1]) 
        # Request handler
        reqs = MPI.MultiRequest(4)
        # Initiate data reciever
        (neighbors[1] != MPI.PROC_NULL) && MPI.Irecv!(recv_1, comm_cart, reqs[1]; source=neighbors[1])
        (neighbors[2] != MPI.PROC_NULL) && MPI.Irecv!(recv_2, comm_cart, reqs[2]; source=neighbors[2])
        # Send data
        (neighbors[1] != MPI.PROC_NULL) && MPI.Isend(send_1, comm_cart, reqs[3]; dest=neighbors[1])
        (neighbors[2] != MPI.PROC_NULL) && MPI.Isend(send_2, comm_cart, reqs[4]; dest=neighbors[2])
        # Block until all transactions are done before touching buffers
        MPI.Waitall(reqs) 
        # Copy from buffers (copyto! needs a pointer to the cell)
        r1 = @view A[1:1, 1] 
        r2 = @view A[end:end, 1]
        (neighbors[1] != MPI.PROC_NULL) && copyto!(r1, recv_1)
        (neighbors[2] != MPI.PROC_NULL) && copyto!(r2, recv_2)
    end
end

In [245]:
@mpi_do manager begin
    D  = 1e-4
    ds = 1e-4
    dt = ds^2 / D / 8.2    
    qx(ix, D, C, ds) = -D * (C[ix+1, 1] - C[ix, 1]) / ds
end

In [246]:
@mpi_do manager begin
    C = zeros(10, 1)
    if rank == 1
        C[8] = 1/ds
    end
end

In [247]:
@mpi_do manager begin
    println(C)
end

      From worker 2:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 4:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 5:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 3:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 10000.0; 0.0; 0.0;;]


In [248]:
@mpi_do manager begin
    function step_diffusion!(C2, C)
        for i in 1:size(C, 1) - 2
            C2[i+1] = C[i+1] - dt * (qx(i+1, D, C, ds) - qx(i, D, C, ds)) / ds
        end
    end
end

In [249]:
@mpi_do manager begin
    C2 = similar(C)
    fill!(C2, 0.)
end

In [250]:
@mpi_do manager begin
    step_diffusion!(C2, C)
    halo_exchange!(C2)
    C, C2 = C2, C
end

In [252]:
@mpi_do manager begin
    println(C)
end

      From worker 5:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 4:	[1219.5121951219512; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 2:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 0.0;;]
      From worker 3:	[0.0; 0.0; 0.0; 0.0; 0.0; 0.0; 1219.5121951219512; 7560.975609756098; 1219.5121951219512; 0.0;;]


In [253]:
1+1

2