Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic kokkos #206

Merged
merged 29 commits into from
Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
060bfc4
Kokkos: Add git submodule and CMake changes
ajpowelsnl Jan 12, 2022
8da0e88
Kokkos: Initialize/Finalize Kokkos in Driver
ajpowelsnl Jan 12, 2022
2da3277
Kokkos: Add basic kernel group variants
ajpowelsnl Jan 12, 2022
a8b4fc5
Clean up basic kernels and CMakeLists.txt
ajpowelsnl Jan 13, 2022
42f1181
Addressing PR comments before merge with RPS develop
ajpowelsnl Feb 2, 2022
a9ed2f7
Add DAXPY_ATOMIC, formatting, in REDUCE3, Kokkos::min, Kokkos::max
ajpowelsnl Feb 7, 2022
2ba3c9d
DAXPY_ATOMIC: copying files from llnl/develop to restore original fil…
ajpowelsnl Feb 9, 2022
f6b8232
Delete code now in KokkosViewUtils.hpp
Feb 8, 2022
800bd44
RAJAPerfSuite.hpp 0d8b139: removing RUN_KOKKOS infrastructure in unfo…
Feb 8, 2022
ff93ba6
RAJAPerfSuite.hpp: rm line 18 to minimize changes
Feb 8, 2022
73dc173
reverting formatting changes
ajpowelsnl Feb 9, 2022
c843d99
Restore destructor in DAXPY_ATOMIC.cpp
ajpowelsnl May 21, 2022
422f8d0
Kokkos updates for running with the current RAJA + Kokkos
ajpowelsnl May 25, 2022
a0f5c31
Addressing D. Beckingsale PR comments
ajpowelsnl Jun 1, 2022
593489b
Build and runtime fix ups with Jason B.
ajpowelsnl Jun 17, 2022
d042c8a
MULADDSUB-Kokkos.cpp: add guard to fix failing CI
ajpowelsnl Jun 17, 2022
eb3fa28
Update src/RAJAPerfSuiteDriver.cpp
ajpowelsnl Jun 21, 2022
0ee2fce
Addressing Jason B. PR comments
ajpowelsnl Jun 21, 2022
4b529c4
Merge branch 'basic-kokkos' of github.com:ajpowelsnl/RAJAPerf into ba…
ajpowelsnl Jun 21, 2022
a9bd651
Addressing Jason B. PR requests: re-adding two includes + rm Kokkos_…
ajpowelsnl Jun 21, 2022
f8eef46
RAJAPerfSuite.hpp: addressing final-ish PR comments from Jason B.
ajpowelsnl Jun 21, 2022
0407c36
Update src/common/RAJAPerfSuite.hpp
ajpowelsnl Jun 22, 2022
d57a5d9
Update src/common/RAJAPerfSuite.hpp
ajpowelsnl Jun 22, 2022
3e49f7d
Update src/common/RAJAPerfSuite.hpp
ajpowelsnl Jun 22, 2022
c28aff3
Update src/common/RAJAPerfSuite.hpp
ajpowelsnl Jun 22, 2022
8ed1554
Remove extra whitespace
MrBurmark Jun 22, 2022
dbbae25
Add back in newline
MrBurmark Jun 22, 2022
9c2c431
Remove whitespace change
MrBurmark Jun 22, 2022
6add081
Remove whitespace change
MrBurmark Jun 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "tpl/RAJA"]
path = tpl/RAJA
url = https://github.com/LLNL/RAJA.git
[submodule "tpl/kokkos"]
path = tpl/kokkos
url = https://github.com/kokkos/kokkos
63 changes: 56 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.14.5)

option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable
this, and all other variants, to run _only_ raw C loops." On)
option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off)

#
# Note: the BLT build system is inheritted by RAJA and is initialized by RAJA
Expand All @@ -22,8 +23,13 @@ if (PERFSUITE_ENABLE_WARNINGS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
endif()

set(CMAKE_CXX_STANDARD 14)
set(BLT_CXX_STD c++14)
if(ENABLE_KOKKOS)
set(CMAKE_CXX_STANDARD 17)
set(BLT_CXX_STD c++17)
else()
set(CMAKE_CXX_STANDARD 14)
set(BLT_CXX_STD c++14)
endif()

include(blt/SetupBLT.cmake)

Expand Down Expand Up @@ -100,7 +106,12 @@ endif()
if (ENABLE_CUDA)
list(APPEND RAJA_PERFSUITE_DEPENDS cuda)
endif()
if (ENABLE_HIP)

# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU
# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler.
# Separate RAJAPerf Suite and Kokkos handling of HIP compilers

if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS))
message(STATUS "HIP version: ${hip_VERSION}")
if("${hip_VERSION}" VERSION_LESS "3.5")
message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ")
Expand All @@ -113,8 +124,13 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE})
set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME})

if (ENABLE_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
if (ENABLE_KOKKOS)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr")
else()
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
endif()

set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}")
list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER})
Expand All @@ -135,13 +151,46 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in

include_directories($<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>)

# Make sure RAJA flag propagate (we need to do some house cleaning to
# Make sure RAJA flags propagate (we need to do some tidying to
# remove project-specific CMake variables that are no longer needed)
set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS})

#
# Each directory in the perf suite has its own CMakeLists.txt file.
#

# ENABLE_KOKKOS is A RAJAPerf Suite Option
if(ENABLE_KOKKOS)
add_definitions(-DRUN_KOKKOS)
if(ENABLE_HIP)
set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the
Kokkos_ENABLE_HIP variable to ON")
endif()

if(ENABLE_TARGET_OPENMP)
set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring")
if(NOT CMAKE_BUILD_TYPE MATCHES Debug)
if(NOT EXPERIMENTAL_BUILD)
message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON")
endif()
endif()
endif()

# ENABLE_CUDA IS A RAJA PERFSUITE OPTION
if(ENABLE_CUDA)
set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring")
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring")
enable_language(CUDA)
endif()
if(ENABLE_OPENMP)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring")
endif()

add_subdirectory(tpl/kokkos)
get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES)
include_directories(${KOKKOS_INCLUDE_DIRS})
list(APPEND RAJA_PERFSUITE_DEPENDS kokkos)
endif()

add_subdirectory(src)

if (RAJA_PERFSUITE_ENABLE_TESTS)
Expand Down
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
include_directories(.)

add_subdirectory(common)
add_subdirectory(apps)
add_subdirectory(basic)
add_subdirectory(basic-kokkos)
add_subdirectory(apps)
add_subdirectory(lcals)
add_subdirectory(polybench)
add_subdirectory(stream)
Expand All @@ -20,10 +21,12 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS
common
apps
basic
basic-kokkos
lcals
polybench
stream
algorithm)

MrBurmark marked this conversation as resolved.
Show resolved Hide resolved
list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS})

if(ENABLE_TARGET_OPENMP)
Expand Down
10 changes: 10 additions & 0 deletions src/RAJAPerfSuiteDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#ifdef RUN_KOKKOS
#include <Kokkos_Core.hpp>
#endif

#include "common/Executor.hpp"

#include <iostream>
Expand All @@ -24,6 +28,9 @@ int main( int argc, char** argv )
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl;
#endif
#ifdef RUN_KOKKOS
Kokkos::initialize(argc, argv);
#endif

// STEP 1: Create suite executor object
rajaperf::Executor executor(argc, argv);
Expand All @@ -43,6 +50,9 @@ int main( int argc, char** argv )

rajaperf::getCout() << "\n\nDONE!!!...." << std::endl;

#ifdef RUN_KOKKOS
Kokkos::finalize();
#endif
#ifdef RAJA_PERFSUITE_ENABLE_MPI
MPI_Finalize();
#endif
Expand Down
27 changes: 27 additions & 0 deletions src/basic-kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
###############################################################################
# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
# and RAJA Performance Suite project contributors.
# See the RAJAPerf/COPYRIGHT file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic)

blt_add_library(
NAME basic-kokkos
SOURCES
PI_ATOMIC-Kokkos.cpp
DAXPY-Kokkos.cpp
IF_QUAD-Kokkos.cpp
INIT3-Kokkos.cpp
INIT_VIEW1D-Kokkos.cpp
INIT_VIEW1D_OFFSET-Kokkos.cpp
MULADDSUB-Kokkos.cpp
NESTED_INIT-Kokkos.cpp
REDUCE3_INT-Kokkos.cpp
TRAP_INT-Kokkos.cpp
DAXPY_ATOMIC-Kokkos.cpp
INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic
DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
)
69 changes: 69 additions & 0 deletions src/basic-kokkos/DAXPY-Kokkos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/COPYRIGHT file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "DAXPY.hpp"
#if defined(RUN_KOKKOS)
#include "common/KokkosViewUtils.hpp"
#include <iostream>

namespace rajaperf {
namespace basic {

struct DaxpyFunctor {
Real_ptr x;
Real_ptr y;
Real_type a;
DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a)
: x(m_x), y(m_y), a(m_a) {}
void operator()(Index_type i) const { DAXPY_BODY; }
};

void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {
const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

DAXPY_DATA_SETUP;

auto x_view = getViewFromPointer(x, iend);
auto y_view = getViewFromPointer(y, iend);

switch (vid) {

case Kokkos_Lambda: {

Kokkos::fence();
startTimer();

for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
Kokkos::parallel_for(
"DAXPY-Kokkos Kokkos_Lambda",
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; });
}

Kokkos::fence();
stopTimer();

break;
}
default: {
std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl;
}
}

// Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from
// the device

moveDataToHostFromKokkosView(x, x_view, iend);
moveDataToHostFromKokkosView(y, y_view, iend);
}

} // end namespace basic
} // end namespace rajaperf
#endif
70 changes: 70 additions & 0 deletions src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "DAXPY_ATOMIC.hpp"
#if defined(RUN_KOKKOS)
#include "common/KokkosViewUtils.hpp"
#include <iostream>

// Delete me
// For de-bugging:
#include "RAJA/RAJA.hpp"

namespace rajaperf {
namespace basic {

void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{

const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

DAXPY_ATOMIC_DATA_SETUP;
//
// Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp
//

auto x_view = getViewFromPointer(x, iend);
auto y_view = getViewFromPointer(y, iend);

switch (vid) {

case Kokkos_Lambda: {

Kokkos::fence();
startTimer();

for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Kokkos::parallel_for(
"DAXPY_ATOMIC_Kokkos Kokkos_Lambda",
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
KOKKOS_LAMBDA(Index_type i) {
Kokkos::atomic_add(&y_view[i], a * x_view[i]);
});
}

Kokkos::fence();
stopTimer();

break;
}

default: {
getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl;
}
}

moveDataToHostFromKokkosView(x, x_view, iend);
moveDataToHostFromKokkosView(y, y_view, iend);
}

} // end namespace basic
} // end namespace rajaperf
#endif
Loading