Skip to content

Commit

Permalink
Add tests for all collectives.
Browse files Browse the repository at this point in the history
This changes test_correctness to test_allreduces for consistency.

New argument passing added into test_utils.

Note that the allgather tests currently fail (see #22).
  • Loading branch information
ndryden committed Nov 23, 2018
1 parent c72bfa5 commit 5ab2991
Show file tree
Hide file tree
Showing 11 changed files with 1,495 additions and 121 deletions.
99 changes: 23 additions & 76 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# The name changed in CMake 3.10
if (NOT MPIEXEC_EXECUTABLE AND MPIEXEC)
set(MPIEXEC_EXECUTABLE ${MPIEXEC})
endif ()

set_full_path(TEST_HEADERS
test_utils.hpp)
if (AL_HAS_CUDA)
Expand All @@ -23,78 +18,30 @@ target_sources(aluminum_test_headers INTERFACE "${TEST_HEADERS}")
target_include_directories(
aluminum_test_headers INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")

add_executable(TestCorrectness.exe test_correctness.cpp ${TEST_HEADERS})
target_link_libraries(TestCorrectness.exe PRIVATE Al)
if (AL_HAS_CUDA)
target_link_libraries(TestCorrectness.exe PUBLIC cuda)
endif ()

# This is mostly a sanity check
set(TEST_ARGS MPI 8)
add_test(NAME TestCorrectness
COMMAND $<TARGET_FILE:TestCorrectness.exe> ${TEST_ARGS})
set(TEST_SRCS
test_allreduce.cpp
test_reduce.cpp
test_reduce_scatter.cpp
test_allgather.cpp
test_alltoall.cpp
test_bcast.cpp
test_gather.cpp
test_scatter.cpp
test_multi_nballreduces.cpp
test_nccl_collectives.cpp)

foreach(src ${TEST_SRCS})
string(REPLACE ".cpp" ".exe" _test_exe_name "${src}")
add_executable(${_test_exe_name} ${src})
target_link_libraries(${_test_exe_name} PRIVATE Al aluminum_test_headers)
if (AL_HAS_CUDA)
target_link_libraries(${_test_exe_name} PUBLIC cuda)
endif()
endforeach()

if (MPIEXEC_EXECUTABLE)
add_test(NAME TestCorrectness_np4
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
${MPIEXEC_PREFLAGS}
$<TARGET_FILE:TestCorrectness.exe>
${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
endif ()

add_executable(TestMultiNBAllReduces.exe
test_multi_nballreduces.cpp ${TEST_HEADERS})
target_link_libraries(TestMultiNBAllReduces.exe PRIVATE Al)
if (AL_HAS_CUDA)
target_link_libraries(TestMultiNBAllReduces.exe PUBLIC cuda)
endif ()

set(TEST_ARGS "8")
add_test(NAME TestMultiNBAllReduces
COMMAND $<TARGET_FILE:TestMultiNBAllReduces.exe> ${TEST_ARGS})

if (MPIEXEC_EXECUTABLE)
add_test(NAME TestMultiNBAllReduces_np4
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
${MPIEXEC_PREFLAGS}
$<TARGET_FILE:TestMultiNBAllReduces.exe>
${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
endif ()

add_executable(TestNCCLCollectives.exe
test_nccl_collectives.cpp ${TEST_HEADERS})
target_link_libraries(TestNCCLCollectives.exe PRIVATE Al)
if (AL_HAS_CUDA)
target_link_libraries(TestNCCLCollectives.exe PUBLIC cuda)
endif ()

set(TEST_ARGS "8")
add_test(NAME TestNCCLCollectives
COMMAND $<TARGET_FILE:TestNCCLCollectives.exe> ${TEST_ARGS})

if (MPIEXEC_EXECUTABLE)
add_test(NAME TestNCCLCollectives_np4
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
${MPIEXEC_PREFLAGS}
$<TARGET_FILE:TestNCCLCollectives.exe>
${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
endif ()

if (AL_HAS_CUDA)
add_executable(TestStreamMemOps.exe
add_executable(test_stream_mem_ops.exe
test_stream_mem_ops.cpp ${TEST_HEADERS})
target_link_libraries(TestStreamMemOps.exe PRIVATE Al)
target_link_libraries(TestStreamMemOps.exe PUBLIC cuda)

set(TEST_ARGS "8")
add_test(NAME TestStreamMemOps
COMMAND $<TARGET_FILE:TestStreamMemOps.exe> ${TEST_ARGS})

if (MPIEXEC_EXECUTABLE)
add_test(NAME TestStreamMemOps_np4
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
${MPIEXEC_PREFLAGS}
$<TARGET_FILE:TestStreamMemOps.exe>
${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
endif ()
target_link_libraries(test_stream_mem_ops.exe PRIVATE Al)
target_link_libraries(test_stream_mem_ops.exe PUBLIC cuda)
endif ()
178 changes: 178 additions & 0 deletions test/test_allgather.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018, Lawrence Livermore National Security, LLC. Produced at the
// Lawrence Livermore National Laboratory in collaboration with University of
// Illinois Urbana-Champaign.
//
// Written by the LBANN Research Team (N. Dryden, N. Maruyama, et al.) listed in
// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
//
// LLNL-CODE-756777.
// All rights reserved.
//
// This file is part of Aluminum GPU-aware Communication Library. For details, see
// http://software.llnl.gov/Aluminum or https://github.com/LLNL/Aluminum.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#include <iostream>
#include "Al.hpp"
#include "test_utils.hpp"
#ifdef AL_HAS_NCCL
#include "test_utils_nccl_cuda.hpp"
#endif
#ifdef AL_HAS_MPI_CUDA
#include "test_utils_mpi_cuda.hpp"
#endif

#include <stdlib.h>
#include <math.h>
#include <string>

// Size is the per-rank send size.
size_t start_size = 1;
size_t max_size = 1<<30;

/**
* Test allgather algo on input, check with expected.
*/
template <typename Backend>
void test_allgather_algo(const typename VectorType<Backend>::type& expected,
const typename VectorType<Backend>::type& expected_inplace,
typename VectorType<Backend>::type input,
typename VectorType<Backend>::type input_inplace,
typename Backend::comm_type& comm,
typename Backend::algo_type algo) {
auto recv = get_vector<Backend>(input.size() * comm.size());
// Test regular allgather.
Al::Allgather<Backend>(input.data(), recv.data(), input.size(), comm, algo);
if (!check_vector(expected, recv)) {
std::cout << comm.rank() << ": regular allgather does not match" <<
std::endl;
std::abort();
}
MPI_Barrier(MPI_COMM_WORLD);
// Test in-place allgather.
std::stringstream ss;
ss << comm.rank() << ": input: ";
for (const auto& v : input_inplace.copyout()) ss << v << " ";
std::cout << ss.str() << std::endl;
Al::Allgather<Backend>(input_inplace.data(), input_inplace.size() / comm.size(),
comm, algo);
MPI_Barrier(MPI_COMM_WORLD);
if (!check_vector(expected_inplace, input_inplace)) {
std::cout << comm.rank() << ": in-place allgather does not match" <<
std::endl;
std::abort();
}
}

/**
* Test non-blocking allgather algo on input, check with expected.
*/
template <typename Backend>
void test_nb_allgather_algo(const typename VectorType<Backend>::type& expected,
const typename VectorType<Backend>::type& expected_inplace,
typename VectorType<Backend>::type input,
typename VectorType<Backend>::type input_inplace,
typename Backend::comm_type& comm,
typename Backend::algo_type algo) {
typename Backend::req_type req = get_request<Backend>();
auto recv = get_vector<Backend>(input.size() * comm.size());
// Test regular allgather.
Al::NonblockingAllgather<Backend>(input.data(), recv.data(),
input.size(), comm, req, algo);
Al::Wait<Backend>(req);
if (!check_vector(expected, recv)) {
std::cout << comm.rank() << ": regular allgather does not match" <<
std::endl;
std::abort();
}
MPI_Barrier(MPI_COMM_WORLD);
// Test in-place allgather.
Al::NonblockingAllgather<Backend>(input_inplace.data(),
input_inplace.size() / comm.size(),
comm, req, algo);
Al::Wait<Backend>(req);
if (!check_vector(expected_inplace, input_inplace)) {
std::cout << comm.rank() << ": in-place allgather does not match" <<
std::endl;
std::abort();
}
}

template <typename Backend>
void test_correctness() {
auto algos = get_allgather_algorithms<Backend>();
auto nb_algos = get_nb_allgather_algorithms<Backend>();
typename Backend::comm_type comm; // Use COMM_WORLD.
// Compute sizes to test.
std::vector<size_t> sizes = get_sizes(start_size, max_size, true);
for (const auto& size : sizes) {
if (comm.rank() == 0) {
std::cout << "Testing size " << human_readable_size(size) << std::endl;
}
// Compute true value.
size_t global_size = size * comm.size();
typename VectorType<Backend>::type &&data = gen_data<Backend>(size);
auto expected = get_vector<Backend>(global_size);
get_expected_allgather_result(data, expected);
typename VectorType<Backend>::type &&data_inplace = gen_data<Backend>(global_size);
auto expected_inplace(data_inplace);
get_expected_allgather_inplace_result(expected_inplace);
// Test algorithms.
for (auto&& algo : algos) {
MPI_Barrier(MPI_COMM_WORLD);
if (comm.rank() == 0) {
std::cout << " Algo: " << Al::allreduce_name(algo) << std::endl;
}
test_allgather_algo<Backend>(expected, expected_inplace,
data, data_inplace, comm, algo);
}
for (auto&& algo : nb_algos) {
MPI_Barrier(MPI_COMM_WORLD);
if (comm.rank() == 0) {
std::cout << " Algo: NB " << Al::allreduce_name(algo) << std::endl;
}
test_nb_allgather_algo<Backend>(expected, expected_inplace,
data, data_inplace, comm, algo);
}
}
}

int main(int argc, char** argv) {
// Need to set the CUDA device before initializing Aluminum.
#ifdef AL_HAS_CUDA
set_device();
#endif
Al::Initialize(argc, argv);

std::string backend = "MPI";
parse_args(argc, argv, backend, start_size, max_size);

if (backend == "MPI") {
std::cerr << "Allgather not supported on MPI backend." << std::endl;
std::abort();
#ifdef AL_HAS_NCCL
} else if (backend == "NCCL") {
test_correctness<Al::NCCLBackend>();
#endif
#ifdef AL_HAS_MPI_CUDA
} else if (backend == "MPI-CUDA") {
test_correctness<Al::MPICUDABackend>();
#endif
}

Al::Finalize();
return 0;
}
34 changes: 4 additions & 30 deletions test/test_correctness.cpp → test/test_allreduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,9 @@
#include <math.h>
#include <string>

size_t start_size = 1;
size_t max_size = 1<<30;

void get_expected_result(std::vector<float>& expected) {
MPI_Allreduce(MPI_IN_PLACE, expected.data(), expected.size(),
MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
}

/**
* Test allreduce algo on input, check with expected.
*/
Expand Down Expand Up @@ -113,22 +109,15 @@ void test_correctness() {
auto nb_algos = get_nb_allreduce_algorithms<Backend>();
typename Backend::comm_type comm; // Use COMM_WORLD.
// Compute sizes to test.
std::vector<size_t> sizes = {0};
for (size_t size = 1; size <= max_size; size *= 2) {
sizes.push_back(size);
// Avoid duplicating 2.
if (size > 1) {
sizes.push_back(size + 1);
}
}
std::vector<size_t> sizes = get_sizes(start_size, max_size, true);
for (const auto& size : sizes) {
if (comm.rank() == 0) {
std::cout << "Testing size " << human_readable_size(size) << std::endl;
}
// Compute true value.
typename VectorType<Backend>::type &&data = gen_data<Backend>(size);
auto expected(data);
get_expected_result(expected);
get_expected_allreduce_result(expected);
// Test algorithms.
for (auto&& algo : algos) {
MPI_Barrier(MPI_COMM_WORLD);
Expand All @@ -155,12 +144,7 @@ int main(int argc, char** argv) {
Al::Initialize(argc, argv);

std::string backend = "MPI";
if (argc >= 2) {
backend = argv[1];
}
if (argc == 3) {
max_size = std::stoul(argv[2]);
}
parse_args(argc, argv, backend, start_size, max_size);

if (backend == "MPI") {
test_correctness<Al::MPIBackend>();
Expand All @@ -172,16 +156,6 @@ int main(int argc, char** argv) {
} else if (backend == "MPI-CUDA") {
test_correctness<Al::MPICUDABackend>();
#endif
} else {
std::cerr << "usage: " << argv[0] << " [MPI";
#ifdef AL_HAS_NCCL
std::cerr << " | NCCL";
#endif
#ifdef AL_HAS_MPI_CUDA
std::cerr << " | MPI-CUDA";
#endif
std::cerr << "]" << std::endl;
return -1;
}

Al::Finalize();
Expand Down
Loading

0 comments on commit 5ab2991

Please sign in to comment.