diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..735e1fd --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,263 @@ +cmake_minimum_required(VERSION 3.16) +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_DISABLE_SOURCE_CHANGES ON) +set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif() + +# User-defined build options +option(CUDECOMP_BUILD_FORTRAN "Build Fortran bindings" ON) +option(CUDECOMP_ENABLE_NVTX "Enable NVTX ranges" ON) +option(CUDECOMP_ENABLE_NVSHMEM "Enable NVSHMEM" OFF) +option(CUDECOMP_BUILD_EXTRAS "Build benchmark, examples, and tests" OFF) +set(CUDECOMP_CUDA_CC_LIST "70;80;90" CACHE STRING "List of CUDA compute capabilities to build cuDecomp for.") +set(CUDECOMP_NCCL_HOME CACHE STRING "Path to search for NCCL installation. Use to override NVHPC provided NCCL version.") +set(CUDECOMP_NVSHMEM_HOME CACHE STRING "Path to search for NVSHMEM installation. Use to override NVHPC provided NVSHMEM version.") + +# Use NVHPC compilers by default +set(CMAKE_CXX_COMPILER "nvc++") +set(CMAKE_Fortran_COMPILER "nvfortran") + +# Locate and use NVHPC CMake configuration +find_program(NVHPC_CXX_BIN "nvc++") +string(REPLACE "compilers/bin/nvc++" "cmake" NVHPC_CMAKE_DIR ${NVHPC_CXX_BIN}) +set(CMAKE_PREFIX_PATH ${NVHPC_CMAKE_DIR}) + +if (CUDECOMP_BUILD_FORTRAN) + set(LANGS CXX CUDA Fortran) +else() + set(LANGS CXX CUDA) +endif() + +project(cudecomp LANGUAGES ${LANGS}) + +# Detect if Cray compiler wrappers are available to assess if in Cray environment. +# We do not use the Cray compiler wrappers directly for greater flexibility. +find_program(CRAY_CC_BIN "CC") + +if (CRAY_CC_BIN) + message(STATUS "Found Cray CC wrapper. Compiling for Cray programming environment.") +endif() + +# MPI +find_package(MPI REQUIRED) + +if (CRAY_CC_BIN) + # FindMPI does not include Cray GTL (e.g. CUDA-aware) libs + # automatically in Cray environment. Locate it to include in linking. + string(REPLACE ":" ";" CRAY_LIB_PATHS $ENV{CRAY_LD_LIBRARY_PATH}) + find_library(CRAY_MPI_GTL_CUDA_LIBRARY REQUIRED + NAMES mpi_gtl_cuda + HINTS ${CRAY_LIB_PATHS} + ) + + # Cray GTL libs benefit from linking against gdrcopy, so also + # locating that library. + find_library(GDRCOPY_LIBRARY REQUIRED + NAMES gdrapi + ) + + message(STATUS "Found Cray GTL library: " ${CRAY_MPI_GTL_CUDA_LIBRARY}) + message(STATUS "Found GDRCopy library: " ${GDRCOPY_LIBRARY}) +endif() + +# TODO: Check for MPICH to define `-DMPICH` flag + +# HPC SDK +find_package(NVHPC REQUIRED COMPONENTS CUDA MATH) + +# Set up required include directory flags, NVHPC CMake config only defined library directories +string(REPLACE "/lib64" "/include" NVHPC_CUDA_INCLUDE_DIR ${NVHPC_CUDA_LIBRARY_DIR}) +string(REPLACE "/lib64" "/include" NVHPC_CUFFT_INCLUDE_DIR ${NVHPC_MATH_LIBRARY_DIR}) +string(REPLACE "/lib64" "/include" NVHPC_CUTENSOR_INCLUDE_DIR ${NVHPC_MATH_LIBRARY_DIR}) + +# Get NCCL library (with optional override) +if (CUDECOMP_NCCL_HOME) + find_path(NCCL_INCLUDE_DIR REQUIRED + NAMES nccl.h + HINTS ${CUDECOMP_NCCL_HOME}/include + ) + + find_library(NCCL_LIBRARY REQUIRED + NAMES nccl + HINTS ${CUDECOMP_NCCL_HOME}/lib + ) +else() + find_package(NVHPC REQUIRED COMPONENTS NCCL) + find_library(NCCL_LIBRARY + NAMES nccl + HINTS ${NVHPC_NCCL_LIBRARY_DIR} + ) + string(REPLACE "/lib" "/include" NCCL_INCLUDE_DIR ${NVHPC_NCCL_LIBRARY_DIR}) +endif() + +message(STATUS "Using NCCL library: ${NCCL_LIBRARY}") + +if (CUDECOMP_ENABLE_NVSHMEM) + # Get NVSHMEM library (with optional override) + if (CUDECOMP_NVSHMEM_HOME) + find_path(NVSHMEM_INCLUDE_DIR REQUIRED + NAMES nvshmem.h + HINTS ${CUDECOMP_NVSHMEM_HOME}/include + ) + + find_path(NVSHMEM_LIBRARY_DIR REQUIRED + NAMES libnvshmem.a + HINTS ${CUDECOMP_NVSHMEM_HOME}/lib + ) + else() + find_package(NVHPC REQUIRED COMPONENTS NVSHMEM) + set(NVSHMEM_LIBRARY_DIR ${NVHPC_NVSHMEM_LIBRARY_DIR}) + string(REPLACE "/lib" "/include" NVSHMEM_INCLUDE_DIR ${NVHPC_NVSHMEM_LIBRARY_DIR}) + endif() + + message(STATUS "Using NVSHMEM installation at: ${NVSHMEM_LIBRARY_DIR}") + +endif() + +# Building cuDecomp shared lib +add_library(cudecomp SHARED) +set_target_properties(cudecomp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +# Set NVCC flags for requested compute capability +if (CMAKE_VERSION VERSION_LESS 3.18) + foreach(CUDA_CC ${CUDECOMP_CUDA_CC_LIST}) + list(APPEND CUDA_CC_FLAGS -gencode=arch=compute_${CUDA_CC},code=sm_${CUDA_CC}) + endforeach() + target_compile_options(cudecomp PRIVATE $<$: ${CUDA_CC_FLAGS}>) +else() + set_target_properties(cudecomp PROPERTIES CUDA_ARCHITECTURES "${CUDECOMP_CUDA_CC_LIST}") +endif() +target_sources(cudecomp + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/autotune.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/cudecomp_kernels.cu + ${CMAKE_CURRENT_SOURCE_DIR}/src/cudecomp.cc +) + +target_include_directories(cudecomp + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${MPI_CXX_INCLUDE_DIRS} + ${NVHPC_CUDA_INCLUDE_DIR} + ${NVHPC_CUTENSOR_INCLUDE_DIR} + ${NCCL_INCLUDE_DIR} +) + +target_link_libraries(cudecomp PUBLIC NVHPC::CUDART) +target_link_libraries(cudecomp PUBLIC MPI::MPI_CXX) +target_link_libraries(cudecomp PRIVATE NVHPC::CUTENSOR) +target_link_libraries(cudecomp PRIVATE ${NCCL_LIBRARY}) +if (CRAY_CC_BIN) + # In Cray environments, add links to GTL and GDRCopy libs for CUDA-aware support + target_link_libraries(cudecomp PRIVATE ${CRAY_MPI_GTL_CUDA_LIBRARY}) + target_link_libraries(cudecomp PRIVATE ${GDRCOPY_LIBRARY}) +endif() + +if (CUDECOMP_ENABLE_NVTX) + target_compile_definitions(cudecomp PRIVATE ENABLE_NVTX) +endif() + +if (CUDECOMP_ENABLE_NVSHMEM) + target_compile_definitions(cudecomp PRIVATE ENABLE_NVSHMEM) + target_include_directories(cudecomp + PRIVATE + ${NVSHMEM_INCLUDE_DIR} + ) + + # Get NVSHMEM version from header + if (EXISTS ${NVSHMEM_INCLUDE_DIR}/nvshmem_version.h) + file(READ ${NVSHMEM_INCLUDE_DIR}/nvshmem_version.h NVSHMEM_VERSION_RAW) + else() + file(READ ${NVSHMEM_INCLUDE_DIR}/common/nvshmem_version.h NVSHMEM_VERSION_RAW) + endif() + string(REGEX MATCH "NVSHMEM_VENDOR_MAJOR_VERSION ([0-9]*)" _ ${NVSHMEM_VERSION_RAW}) + list(APPEND NVSHMEM_VERSION ${CMAKE_MATCH_1}) + string(REGEX MATCH "NVSHMEM_VENDOR_MINOR_VERSION ([0-9]*)" _ ${NVSHMEM_VERSION_RAW}) + list(APPEND NVSHMEM_VERSION ${CMAKE_MATCH_1}) + list(JOIN NVSHMEM_VERSION "." NVSHMEM_VERSION) + + if (NVSHMEM_VERSION VERSION_LESS "2.7") + # NVSHMEM versions before 2.7 will export NCCL symbols erroneously, need to define this flag + target_compile_definitions(cudecomp PRIVATE NVSHMEM_USE_NCCL) + endif() + + if (NVSHMEM_VERSION VERSION_LESS "2.5") + target_link_libraries(cudecomp PRIVATE ${NVSHMEM_LIBRARY_DIR}/libnvshmem.a) + else() + target_link_libraries(cudecomp PRIVATE ${NVSHMEM_LIBRARY_DIR}/libnvshmem_host.so) + target_link_libraries(cudecomp PRIVATE ${NVSHMEM_LIBRARY_DIR}/libnvshmem_device.a) + target_link_libraries(cudecomp PUBLIC -L${NVHPC_CUDA_LIBRARY_DIR}/stubs -lnvidia-ml) + endif() + target_link_libraries(cudecomp PUBLIC -L${NVHPC_CUDA_LIBRARY_DIR}/stubs -lcuda) + set_target_properties(cudecomp PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties(cudecomp PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) +endif() + +set_target_properties(cudecomp PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/cudecomp.h) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/cudecomp.h ${CMAKE_BINARY_DIR}/include/cudecomp.h) + +install( + TARGETS cudecomp + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include +) + +# Building Fortran shared lib and module +if (CUDECOMP_BUILD_FORTRAN) + # Creating -gpu argument string for Fortran files + foreach(CUDA_CC ${CUDECOMP_CUDA_CC_LIST}) + list(APPEND CUF_GPU_ARG "cc${CUDA_CC}") + endforeach() + list(APPEND CUF_GPU_ARG "cuda${NVHPC_CUDA_VERSION}") + list(JOIN CUF_GPU_ARG "," CUF_GPU_ARG) + + add_library(cudecomp_fort SHARED) + set_target_properties(cudecomp_fort PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + set_target_properties(cudecomp_fort PROPERTIES Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/include) + set_target_properties(cudecomp_fort PROPERTIES LINKER_LANGUAGE Fortran) + target_compile_options(cudecomp_fort PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) + target_sources( + cudecomp_fort + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/cudecomp_m.cuf + ) + set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/src/cudecomp_m.cuf PROPERTIES LANGUAGE Fortran) + + target_link_libraries(cudecomp_fort PUBLIC MPI::MPI_Fortran) + + # Test for MPI_Comm_f2c/c2f + try_compile( + TEST_F2C_RESULT + ${CMAKE_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test_mpi_f2c.f90 + LINK_LIBRARIES MPI::MPI_Fortran + ) + if (NOT TEST_F2C_RESULT) + message(STATUS "Could not link MPI_Comm_f2c in Fortran module. Setting -DMPICH flag during module compilation.") + target_compile_definitions(cudecomp_fort PRIVATE MPICH) + endif() + + install( + TARGETS cudecomp_fort + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + ) + # Install cuDecomp module + install(FILES ${CMAKE_BINARY_DIR}/include/cudecomp.mod DESTINATION ${CMAKE_INSTALL_PREFIX}/include) +endif() + +if (CUDECOMP_BUILD_EXTRAS) + add_subdirectory(benchmark) + + add_subdirectory(tests/cc) + add_subdirectory(examples/cc/basic_usage) + add_subdirectory(examples/cc/taylor_green) + + if (CUDECOMP_BUILD_FORTRAN) + add_subdirectory(tests/fortran) + add_subdirectory(examples/fortran/basic_usage) + add_subdirectory(examples/fortran/poisson) + endif() +endif() + diff --git a/README.md b/README.md index 15bb8cf..d9585a1 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,9 @@ Please contact us or open a GitHub issue if you are interested in using this lib ## Build +### Method 1: Makefile with Configuration file To build the library, you must first create a configuration file to point the installed to dependent library paths and enable/disable features. - See the default [`nvhpcsdk.conf`](configs/nvhpcsdk.conf) for an example of settings to build the library using the [NVHPC SDK compilers and libraries](https://developer.nvidia.com/hpc-sdk). +See the default [`nvhpcsdk.conf`](configs/nvhpcsdk.conf) for an example of settings to build the library using the [NVHPC SDK compilers and libraries](https://developer.nvidia.com/hpc-sdk). The [`configs/`](configs) directory also contains several sample build configuration files for a number of GPU compute clusters, like Perlmutter, Summit, and Marconi 100. With this configuration file created, you can build the library using the command @@ -26,6 +27,21 @@ $ make -j CONFIGFILE= The library will be compiled and installed in a newly created `build/` directory. +### Method 2: CMake +We also enable builds using CMake. A CMake build of the library without additional examples/tests can be completed using the following commands +```shell +$ mkdir build +$ cd build +$ cmake .. +$ make -j +``` +There are several build variables available to configure the CMake build which can be found at the top of the project [`CMakeLists.txt`](CMakeLists.txt) file. As an example, +to configure the build to compile additional examples and enable NVSHMEM backends, you can run the following CMake command +```shell +$ cmake -DCUDECOMP_BUILD_EXTRAS=1 -DCUDECOMP_ENABLE_NVSHMEM=1 .. +``` + + ### Dependencies We strongly recommend building this library using NVHPC SDK compilers and libraries, as the SDK contains all required dependencies for this library and is the focus of our testing. Fortran features are only supported using NVHPC SDK compilers. diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 0000000..d821144 --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,38 @@ +set(benchmark_targets + benchmark_r2c + benchmark_c2c + benchmark_r2c_f + benchmark_c2c_f +) + +foreach(tgt ${benchmark_targets}) + add_executable(${tgt}) + if (CMAKE_VERSION VERSION_LESS 3.18) + target_compile_options(${tgt} PRIVATE $<$: ${CUDA_CC_FLAGS}>) + else() + set_target_properties(${tgt} PROPERTIES CUDA_ARCHITECTURES "${CUDECOMP_CUDA_CC_LIST}") + endif() + target_sources(${tgt} + PRIVATE + benchmark.cu + ) + target_include_directories(${tgt} + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${NVHPC_CUFFT_INCLUDE_DIR} + ${NCCL_INCLUDE_DIR} + ${MPI_CXX_INCLUDE_DIRS}) + target_link_libraries(${tgt} PRIVATE cudecomp) + target_link_libraries(${tgt} PRIVATE NVHPC::CUFFT) + set_target_properties(${tgt} PROPERTIES LINKER_LANGUAGE CXX) +endforeach() + +target_compile_definitions(benchmark_r2c PRIVATE R2C) +target_compile_definitions(benchmark_c2c PRIVATE C2C) +target_compile_definitions(benchmark_r2c_f PRIVATE R2C USE_FLOAT) +target_compile_definitions(benchmark_c2c_f PRIVATE R2C USE_FLOAT) + +install( + TARGETS ${benchmark_targets} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/benchmark +) diff --git a/cmake/test_mpi_f2c.f90 b/cmake/test_mpi_f2c.f90 new file mode 100644 index 0000000..997fe60 --- /dev/null +++ b/cmake/test_mpi_f2c.f90 @@ -0,0 +1,61 @@ +! SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +! SPDX-License-Identifier: BSD-3-Clause +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! 1. Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! 3. Neither the name of the copyright holder nor the names of its +! contributors may be used to endorse or promote products derived from +! this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module test_f2c + use iso_c_binding + implicit none + + type, bind(c) :: MPI_C_Comm + integer(c_int64_t) :: comm + end type MPI_C_Comm + + type, bind(c) :: MPI_F_Comm + integer(c_int) :: comm + end type MPI_F_Comm + + interface + function MPI_Comm_f2c(fcomm) bind(C,name='MPI_Comm_f2c') result(res) + import + type(MPI_F_Comm), value :: fcomm + type(MPI_C_Comm) :: res + end function MPI_Comm_f2c + end interface +end module + +program main + use mpi + use test_f2c + implicit none + + type(MPI_F_Comm) :: fcomm + type(MPI_C_Comm) :: ccomm + + fcomm%comm = MPI_COMM_WORLD + + ccomm = MPI_Comm_f2c(fcomm) +end program diff --git a/examples/cc/basic_usage/CMakeLists.txt b/examples/cc/basic_usage/CMakeLists.txt new file mode 100644 index 0000000..2a825ca --- /dev/null +++ b/examples/cc/basic_usage/CMakeLists.txt @@ -0,0 +1,39 @@ +set(basic_usage_targets_cc + basic_usage_cc + basic_usage_autotune_cc +) + +add_executable(basic_usage_cc) + target_sources(basic_usage_cc + PRIVATE + basic_usage.cu +) + +add_executable(basic_usage_autotune_cc) +target_sources(basic_usage_autotune_cc + PRIVATE + basic_usage_autotune.cu +) + +foreach(tgt ${basic_usage_targets_cc}) + string(REPLACE "_cc" "" bin_name ${tgt}) + set_target_properties(${tgt} PROPERTIES OUTPUT_NAME ${bin_name}) + if (CMAKE_VERSION VERSION_LESS 3.18) + target_compile_options(${tgt} PRIVATE $<$: ${CUDA_CC_FLAGS}>) + else() + set_target_properties(${tgt} PROPERTIES CUDA_ARCHITECTURES "${CUDECOMP_CUDA_CC_LIST}") + endif() + target_include_directories(${tgt} + PRIVATE + ${CMAKE_BINARY_DIR}/include + ${MPI_CXX_INCLUDE_DIRS} + ) + target_link_libraries(${tgt} PRIVATE MPI::MPI_CXX) + target_link_libraries(${tgt} PRIVATE cudecomp) + set_target_properties(${tgt} PROPERTIES LINKER_LANGUAGE CXX) +endforeach() + +install( + TARGETS ${basic_usage_targets_cc} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/examples/cc/basic_usage +) diff --git a/examples/cc/taylor_green/CMakeLists.txt b/examples/cc/taylor_green/CMakeLists.txt new file mode 100644 index 0000000..0580c28 --- /dev/null +++ b/examples/cc/taylor_green/CMakeLists.txt @@ -0,0 +1,28 @@ +add_executable(tg) + +target_sources(tg + PRIVATE + tg.cu +) + +if (CMAKE_VERSION VERSION_LESS 3.18) + target_compile_options(tg PRIVATE $<$: ${CUDA_CC_FLAGS}>) +else() + set_target_properties(tg PROPERTIES CUDA_ARCHITECTURES "${CUDECOMP_CUDA_CC_LIST}") +endif() + +target_include_directories(tg + PRIVATE + ${CMAKE_BINARY_DIR}/include + ${MPI_CXX_INCLUDE_DIRS} + ${NVHPC_CUFFT_INCLUDE_DIR} +) +target_link_libraries(tg PRIVATE MPI::MPI_CXX) +target_link_libraries(tg PRIVATE NVHPC::CUFFT) +target_link_libraries(tg PRIVATE cudecomp) +set_target_properties(tg PROPERTIES LINKER_LANGUAGE CXX) + +install( + TARGETS tg + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/examples/cc/taylor_green +) diff --git a/examples/fortran/basic_usage/CMakeLists.txt b/examples/fortran/basic_usage/CMakeLists.txt new file mode 100644 index 0000000..78a5bb4 --- /dev/null +++ b/examples/fortran/basic_usage/CMakeLists.txt @@ -0,0 +1,36 @@ +set(basic_usage_targets_f + basic_usage_f + basic_usage_autotune_f +) + +add_executable(basic_usage_f) +target_sources(basic_usage_f + PRIVATE + basic_usage.f90 +) + +add_executable(basic_usage_autotune_f) +target_sources(basic_usage_autotune_f + PRIVATE + basic_usage_autotune.f90 +) + +foreach(tgt ${basic_usage_targets_f}) + string(REPLACE "_f" "" bin_name ${tgt}) + set_target_properties(${tgt} PROPERTIES OUTPUT_NAME ${bin_name}) + target_include_directories(${tgt} + PRIVATE + ${CMAKE_BINARY_DIR}/include + ${MPI_Fortran_INCLUDE_DIRS} + ) + target_link_libraries(${tgt} PRIVATE MPI::MPI_Fortran) + target_link_libraries(${tgt} PRIVATE cudecomp) + target_link_libraries(${tgt} PRIVATE cudecomp_fort) + target_compile_options(${tgt} PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) + target_link_options(${tgt} PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) +endforeach() + +install( + TARGETS ${basic_usage_targets_f} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/examples/fortran/basic_usage +) diff --git a/examples/fortran/poisson/CMakeLists.txt b/examples/fortran/poisson/CMakeLists.txt new file mode 100644 index 0000000..2ec91d1 --- /dev/null +++ b/examples/fortran/poisson/CMakeLists.txt @@ -0,0 +1,23 @@ +add_executable(poisson) +target_sources(poisson + PRIVATE + poisson.f90 +) + +target_include_directories(poisson + PRIVATE + ${CMAKE_BINARY_DIR}/include + ${MPI_Fortran_INCLUDE_DIRS} + ${NVHPC_CUFFT_INCLUDE_DIR} +) +target_link_libraries(poisson PRIVATE MPI::MPI_Fortran) +target_link_libraries(poisson PRIVATE NVHPC::CUFFT) +target_link_libraries(poisson PRIVATE cudecomp) +target_link_libraries(poisson PRIVATE cudecomp_fort) +target_compile_options(poisson PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) +target_link_options(poisson PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) + +install( + TARGETS poisson + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/examples/fortran/poisson +) diff --git a/tests/cc/CMakeLists.txt b/tests/cc/CMakeLists.txt new file mode 100644 index 0000000..a3ec659 --- /dev/null +++ b/tests/cc/CMakeLists.txt @@ -0,0 +1,60 @@ +set(transpose_test_targets_cc + transpose_test_R32_cc + transpose_test_R64_cc + transpose_test_C32_cc + transpose_test_C64_cc +) + +set(halo_test_targets_cc + halo_test_R32_cc + halo_test_R64_cc + halo_test_C32_cc + halo_test_C64_cc +) + +foreach(tgt ${transpose_test_targets_cc}) + add_executable(${tgt}) + target_sources(${tgt} + PRIVATE + transpose_test.cc + ) +endforeach() + +foreach(tgt ${halo_test_targets_cc}) + add_executable(${tgt}) + target_sources(${tgt} + PRIVATE + halo_test.cc + ) +endforeach() + +foreach(tgt ${transpose_test_targets_cc} ${halo_test_targets_cc}) + string(REPLACE "_cc" "" bin_name ${tgt}) + set_target_properties(${tgt} PROPERTIES OUTPUT_NAME ${bin_name}) + target_include_directories(${tgt} + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${MPI_CXX_INCLUDE_DIRS} + ${NCCL_INCLUDE_DIR} + ${NVHPC_CUFFT_INCLUDE_DIR} + ${NVHPC_CUDA_INCLUDE_DIR} + ) + target_link_libraries(${tgt} PRIVATE MPI::MPI_CXX) + target_link_libraries(${tgt} PRIVATE NVHPC::CUDART) + target_link_libraries(${tgt} PUBLIC cudecomp) +endforeach() + +target_compile_definitions(transpose_test_R32_cc PRIVATE R32) +target_compile_definitions(transpose_test_R64_cc PRIVATE R64) +target_compile_definitions(transpose_test_C32_cc PRIVATE C32) +target_compile_definitions(transpose_test_C64_cc PRIVATE C64) + +target_compile_definitions(halo_test_R32_cc PRIVATE R32) +target_compile_definitions(halo_test_R64_cc PRIVATE R64) +target_compile_definitions(halo_test_C32_cc PRIVATE C32) +target_compile_definitions(halo_test_C64_cc PRIVATE C64) + +install( + TARGETS ${transpose_test_targets_cc} ${halo_test_targets_cc} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/cc +) diff --git a/tests/fortran/CMakeLists.txt b/tests/fortran/CMakeLists.txt new file mode 100644 index 0000000..ef820ef --- /dev/null +++ b/tests/fortran/CMakeLists.txt @@ -0,0 +1,59 @@ +set(transpose_test_targets_f + transpose_test_R32_f + transpose_test_R64_f + transpose_test_C32_f + transpose_test_C64_f +) + +set(halo_test_targets_f + halo_test_R32_f + halo_test_R64_f + halo_test_C32_f + halo_test_C64_f +) + +foreach(tgt ${transpose_test_targets_f}) + add_executable(${tgt}) + target_sources(${tgt} + PRIVATE + transpose_test.f90 + ) +endforeach() + +foreach(tgt ${halo_test_targets_f}) + add_executable(${tgt}) + target_sources(${tgt} + PRIVATE + halo_test.f90 + ) +endforeach() + +foreach(tgt ${transpose_test_targets_f} ${halo_test_targets_f}) + string(REPLACE "_f" "" bin_name ${tgt}) + set_target_properties(${tgt} PROPERTIES OUTPUT_NAME ${bin_name}) + target_include_directories(${tgt} + PRIVATE + ${CMAKE_BINARY_DIR}/include + ${MPI_Fortran_INCLUDE_DIRS} + ) + target_link_libraries(${tgt} PRIVATE MPI::MPI_Fortran) + target_link_libraries(${tgt} PUBLIC cudecomp) + target_link_libraries(${tgt} PUBLIC cudecomp_fort) + target_compile_options(${tgt} PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) + target_link_options(${tgt} PRIVATE $<$:-cpp -cuda -gpu=${CUF_GPU_ARG}>) +endforeach() + +target_compile_definitions(transpose_test_R32_f PUBLIC R32) +target_compile_definitions(transpose_test_R64_f PUBLIC R64) +target_compile_definitions(transpose_test_C32_f PUBLIC C32) +target_compile_definitions(transpose_test_C64_f PUBLIC C64) + +target_compile_definitions(halo_test_R32_f PUBLIC R32) +target_compile_definitions(halo_test_R64_f PUBLIC R64) +target_compile_definitions(halo_test_C32_f PUBLIC C32) +target_compile_definitions(halo_test_C64_f PUBLIC C64) + +install( + TARGETS ${transpose_test_targets_f} ${halo_test_targets_f} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/fortran +) diff --git a/tests/fortran/halo_test.f90 b/tests/fortran/halo_test.f90 index fc6d0a4..3f2f63e 100644 --- a/tests/fortran/halo_test.f90 +++ b/tests/fortran/halo_test.f90 @@ -26,28 +26,33 @@ ! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#if defined(R32) +#define CHECK_CUDECOMP_EXIT(f) if (f /= CUDECOMP_RESULT_SUCCESS) call exit(1) +#define CHECK_CUDA_EXIT(f) if (f /= cudaSuccess) call exit(1) + +#ifdef R32 #define ARRTYPE real(real32) #define DTYPE CUDECOMP_FLOAT -#define MODNAME halo_CUDECOMP_FLOAT_mod -#elif defined(R64) +module halo_CUDECOMP_FLOAT_mod +#endif + +#ifdef R64 #define ARRTYPE real(real64) #define DTYPE CUDECOMP_DOUBLE -#define MODNAME halo_CUDECOMP_DOUBLE_mod -#elif defined(C32) +module halo_CUDECOMP_DOUBLE_mod +#endif + +#ifdef C32 #define ARRTYPE complex(real32) #define DTYPE CUDECOMP_FLOAT_COMPLEX -#define MODNAME halo_CUDECOMP_FLOAT_COMPLEX_mod -#elif defined(C64) +module halo_CUDECOMP_FLOAT_COMPLEX_mod +#endif + +#ifdef C64 #define ARRTYPE complex(real64) #define DTYPE CUDECOMP_DOUBLE_COMPLEX -#define MODNAME halo_CUDECOMP_DOUBLE_COMPLEX_mod +module halo_CUDECOMP_DOUBLE_COMPLEX_mod #endif -#define CHECK_CUDECOMP_EXIT(f) if (f /= CUDECOMP_RESULT_SUCCESS) call exit(1) -#define CHECK_CUDA_EXIT(f) if (f /= cudaSuccess) call exit(1) - -module MODNAME use, intrinsic :: iso_fortran_env, only: real32, real64 contains function compare_pencils(ref, res, pinfo) result(mismatch) @@ -160,7 +165,7 @@ subroutine flat_copy(src, dst, count) dst(1:count) = src(1:count) end subroutine flat_copy -end module MODNAME +end module program main use cudafor @@ -168,7 +173,21 @@ program main use cudecomp use, intrinsic :: iso_fortran_env, only: real32, real64 - use MODNAME +#ifdef R32 + use halo_CUDECOMP_FLOAT_mod +#endif + +#ifdef R64 + use halo_CUDECOMP_DOUBLE_mod +#endif + +#ifdef C32 + use halo_CUDECOMP_FLOAT_COMPLEX_mod +#endif + +#ifdef C64 + use halo_CUDECOMP_DOUBLE_COMPLEX_mod +#endif implicit none diff --git a/tests/fortran/transpose_test.f90 b/tests/fortran/transpose_test.f90 index 6f1f709..4e42a6a 100644 --- a/tests/fortran/transpose_test.f90 +++ b/tests/fortran/transpose_test.f90 @@ -26,28 +26,33 @@ ! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#if defined(R32) +#define CHECK_CUDECOMP_EXIT(f) if (f /= CUDECOMP_RESULT_SUCCESS) call exit(1) +#define CHECK_CUDA_EXIT(f) if (f /= cudaSuccess) call exit(1) + +#ifdef R32 #define ARRTYPE real(real32) #define DTYPE CUDECOMP_FLOAT -#define MODNAME transpose_CUDECOMP_FLOAT_mod -#elif defined(R64) +module transpose_CUDECOMP_FLOAT_mod +#endif + +#ifdef R64 #define ARRTYPE real(real64) #define DTYPE CUDECOMP_DOUBLE -#define MODNAME transpose_CUDECOMP_DOUBLE_mod -#elif defined(C32) +module transpose_CUDECOMP_DOUBLE_mod +#endif + +#ifdef C32 #define ARRTYPE complex(real32) #define DTYPE CUDECOMP_FLOAT_COMPLEX -#define MODNAME transpose_CUDECOMP_FLOAT_COMPLEX_mod -#elif defined(C64) +module transpose_CUDECOMP_FLOAT_COMPLEX_mod +#endif + +#ifdef C64 #define ARRTYPE complex(real64) #define DTYPE CUDECOMP_DOUBLE_COMPLEX -#define MODNAME transpose_CUDECOMP_DOUBLE_COMPLEX_mod +module transpose_CUDECOMP_DOUBLE_COMPLEX_mod #endif -#define CHECK_CUDECOMP_EXIT(f) if (f /= CUDECOMP_RESULT_SUCCESS) call exit(1) -#define CHECK_CUDA_EXIT(f) if (f /= cudaSuccess) call exit(1) - -module MODNAME use, intrinsic :: iso_fortran_env, only: real32, real64 contains function compare_pencils(ref, res, pinfo) result(mismatch) @@ -109,7 +114,7 @@ subroutine flat_copy(src, dst, count) dst(1:count) = src(1:count) end subroutine flat_copy -end module MODNAME +end module program main use cudafor @@ -117,7 +122,21 @@ program main use cudecomp use, intrinsic :: iso_fortran_env, only: real32, real64 - use MODNAME +#ifdef R32 + use transpose_CUDECOMP_FLOAT_mod +#endif + +#ifdef R64 + use transpose_CUDECOMP_DOUBLE_mod +#endif + +#ifdef C32 + use transpose_CUDECOMP_FLOAT_COMPLEX_mod +#endif + +#ifdef C64 + use transpose_CUDECOMP_DOUBLE_COMPLEX_mod +#endif implicit none