Skip to content

Commit

Permalink
Restructure MPI cluster manager
Browse files Browse the repository at this point in the history
Rename some functions for clarity.
Add many comments.
Clean up / correct termination handling.
Avoid blocking MPI communication in example.
Add test cases.
  • Loading branch information
eschnett committed Nov 25, 2015
1 parent a454737 commit 57226be
Show file tree
Hide file tree
Showing 6 changed files with 498 additions and 250 deletions.
84 changes: 42 additions & 42 deletions examples/06-cman-transport.jl
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
using MPI

comm, comm_size, rank = MPI.init_mpi()
MPI.Init()
rank = MPI.Comm_rank(MPI.COMM_WORLD)
size = MPI.Comm_size(MPI.COMM_WORLD)

include("01-hello-impl.jl")
include("02-broadcast-impl.jl")
include("03-reduce-impl.jl")
include("04-sendrecv-impl.jl")

if length(ARGS) == 0
print("Please specify a transport option to use [MPI|TCP]\n")
exit()
println("Please specify a transport option to use [MPI|TCP]")
MPI.Finalize()
exit(1)
elseif ARGS[1] == "TCP"
manager = MPI.start(TCP_TRANSPORT_ALL) # does not return on worker
manager = MPI.start_main_loop(TCP_TRANSPORT_ALL) # does not return on worker
elseif ARGS[1] == "MPI"
manager = MPI.start(MPI_TRANSPORT_ALL) # does not return on worker
manager = MPI.start_main_loop(MPI_TRANSPORT_ALL) # does not return on worker
else
print("Valid transport options are [MPI|TCP]\n")
exit()
println("Valid transport options are [MPI|TCP]")
MPI.Finalize()
exit(1)
end

if rank == 0
nloops = 10^2
function foo(n)
a=ones(n)
remotecall_fetch(x->x, 2, a);
# Check whether a worker accidentally returned
@assert rank == 0

@elapsed for i in 1:nloops
remotecall_fetch(x->x, 2, a)
end
end

n=10^3
foo(1)
t=foo(n)
println("$t seconds for $nloops loops of send-recv of array size $n")

n=10^6
foo(1)
t=foo(n)
println("$t seconds for $nloops loops of send-recv of array size $n")


print("EXAMPLE: HELLO\n")
@mpi_do manager do_hello()
print("EXAMPLE: BROADCAST\n")
@mpi_do manager do_broadcast()
print("EXAMPLE: REDUCE\n")
@mpi_do manager do_reduce()
print("EXAMPLE: SENDRECV\n")
@mpi_do manager do_sendrecv()

# Abscence of a MPI Finalize causes the cluster to hang - don't yet know why
if ARGS[1] == "TCP"
@mpi_do manager MPI.Finalize()
elseif ARGS[1] == "MPI"
@everywhere (MPI.Finalize(); exit())
nloops = 10^2
function foo(n)
a=ones(n)
remotecall_fetch(x->x, mod1(2, size), a);
@elapsed for i in 1:nloops
remotecall_fetch(x->x, mod1(2, size), a)
end
end

n=10^3
foo(1)
t=foo(n)
println("$t seconds for $nloops loops of send-recv of array size $n")

n=10^6
foo(1)
t=foo(n)
println("$t seconds for $nloops loops of send-recv of array size $n")

# We cannot run these examples since they use MPI.Barrier and other blocking
# communication, disabling our event loop
# print("EXAMPLE: HELLO\n")
# @mpi_do manager do_hello()
# print("EXAMPLE: BROADCAST\n")
# @mpi_do manager do_broadcast()
# print("EXAMPLE: REDUCE\n")
# @mpi_do manager do_reduce()
# print("EXAMPLE: SENDRECV\n")
# @mpi_do manager do_sendrecv()

MPI.stop_main_loop(manager)

0 comments on commit 57226be

Please sign in to comment.