Skip to content

Commit

Permalink
Merge pull request #591 from LLNL/v0.8.0-rc
Browse files Browse the repository at this point in the history
V0.8.0 rc
  • Loading branch information
rhornung67 committed Mar 28, 2019
2 parents caa33b3 + b305a3a commit 8d19a8c
Show file tree
Hide file tree
Showing 168 changed files with 5,178 additions and 2,068 deletions.
6 changes: 6 additions & 0 deletions .travis.yml
Expand Up @@ -69,6 +69,12 @@ matrix:
- IMG=nvcc9
- CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_CUDA=On -DENABLE_WARNINGS=On -DENABLE_WARNINGS_AS_ERRORS=On -DENABLE_TBB=On"
- DO_TEST=no
- compiler: clang-cuda
env:
- COMPILER=clang++
- IMG=clang-cuda
- CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=Off -DENABLE_CLANG_CUDA=On -DBLT_CLANG_CUDA_ARCH=sm_60 -DENABLE_CUDA=On -DCUDA_ARCH=sm_60"
- DO_TEST=no

before_install: # don't try to build and run intel when it's impossible
- |
Expand Down
35 changes: 31 additions & 4 deletions CMakeLists.txt
Expand Up @@ -17,7 +17,7 @@ cmake_policy(SET CMP0048 NEW)

# Set version number
set(RAJA_VERSION_MAJOR 0)
set(RAJA_VERSION_MINOR 7)
set(RAJA_VERSION_MINOR 8)
set(RAJA_VERSION_PATCHLEVEL 0)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
Expand All @@ -27,7 +27,7 @@ endif()
if (NOT RAJA_LOADED)
set (RAJA_LOADED "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")

# Promote RAJA_LOADED to PARENT_SCOPE if it exists, which is only if we are bringing
# Promote RAJA_LOADED to PARENT_SCOPE if it exists, which is only if we are bringing
# in RAJA as a subproject to a larger CMake project
get_directory_property(hasParent PARENT_DIRECTORY)
if(hasParent)
Expand Down Expand Up @@ -55,9 +55,11 @@ if (NOT RAJA_LOADED)
option(ENABLE_TARGET_OPENMP "Build OpenMP on target device support" Off)
option(ENABLE_CLANG_CUDA "Use Clang's native CUDA support" Off)
set(CUDA_ARCH "sm_35" CACHE STRING "Compute architecture to pass to CUDA builds")
option(ENABLE_EXTERNAL_CUB "Use an external cub for scans" Off)
option(ENABLE_TESTS "Build tests" On)
option(ENABLE_REPRODUCERS "Build issue reproducers" Off)
option(ENABLE_EXAMPLES "Build simple examples" On)
option(ENABLE_EXERCISES "Build exercises " On)
option(ENABLE_MODULES "Enable modules in supporting compilers (clang)" On)
option(ENABLE_WARNINGS "Enable warnings as errors for CI" Off)
option(ENABLE_DOCUMENTATION "Build RAJA documentation" Off)
Expand Down Expand Up @@ -128,6 +130,23 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" AND CMAKE_CXX_COMPILER_VERSION V
cuda)
endif ()

if (ENABLE_CUDA)
if(ENABLE_EXTERNAL_CUB)
find_package(CUB)
if (CUB_FOUND)
blt_register_library(
NAME cub
INCLUDES ${CUB_INCLUDE_DIRS})
set(raja_depends
${raja_depends}
cub)
else()
message(WARNING "External CUB not found.")
set(ENABLE_EXTERNAL_CUB Off)
endif()
endif ()
endif ()

if (ENABLE_CHAI)
set (raja_depends
${raja_depends}
Expand Down Expand Up @@ -162,8 +181,12 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" AND CMAKE_CXX_COMPILER_VERSION V
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tpl/cub>
$<INSTALL_INTERFACE:include>)

install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN *.hpp)
install(DIRECTORY tpl/cub/ DESTINATION include FILES_MATCHING PATTERN *.cuh)
if(ENABLE_EXTERNAL_CUB)
install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN *.hpp)
else()
install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN *.hpp)
install(DIRECTORY tpl/cub/ DESTINATION include FILES_MATCHING PATTERN *.cuh)
endif()

install(FILES
${PROJECT_BINARY_DIR}/include/RAJA/config.hpp
Expand All @@ -183,6 +206,10 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" AND CMAKE_CXX_COMPILER_VERSION V
add_subdirectory(examples)
endif()

if(ENABLE_EXERCISES)
add_subdirectory(exercises)
endif()

if (ENABLE_DOCUMENTATION)
add_subdirectory(docs)
endif ()
Expand Down
17 changes: 10 additions & 7 deletions Dockerfile
@@ -1,13 +1,16 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
#
#Builds and installs RAJA using the gcc8 compiler
#

FROM rajaorg/compiler:gcc8
MAINTAINER RAJA Development Team <raja-dev@llnl.gov>

RUN apt-get update -y
RUN apt-get install -y git cmake gdb
COPY --chown=raja:raja . /home/raja/workspace

RUN cd /opt/ && git clone https://github.com/LLNL/RAJA.git
WORKDIR /home/raja/workspace

WORKDIR /opt/RAJA
RUN mkdir build && cd build && cmake -DENABLE_CUDA=OFF ..

RUN mkdir build && cd build && cmake -DENABLE_CUDA=ON ..
RUN cd build && sudo make -j 3 && sudo make install

RUN cd build && make -j && make install
CMD ["bash"]
35 changes: 35 additions & 0 deletions RELEASE_NOTES.md
Expand Up @@ -13,6 +13,41 @@
[comment]: # (For details about use and distribution, please read RAJA/LICENSE.)
[comment]: # (#################################################################)

RAJA v0.8.0 Release Notes
=========================

This release contains one major change and some minor improvements to
compilation and performance.

Major changes include:

* Build system updated to use the latest version of BLT (or close to it).
Depending on how one builds RAJA, this could require changes to how
information is passed to CMake. Content has been added to the relevant
sections of the RAJA User Guide which describes how this is done.

Other notable changes include:

* Features (These are not yet documented and should be considered
experimental. There will be documentation and usage examples in the
next RAJA release.)
* New thread, warp, and bitmask policies for CUDA. These are not
yet documented and should be considered experimental.
* Added AtomicLocalArray type which returns data elements wrapped
in an AtomicRef object.
* Bug Fixes:
* Fixed issue in RangeStrideSegment iteration.
* Fix 'align hint' macro to eliminate compile warning when XL compiler
is used with nvcc.
* Fix issues associated with CUDA architecture level (i.e., sm_*) set
too low and generated compiler warning/errors. Caveats for RAJA features
(mostly atomic operations) available at different CUDA architecture
levels added to User Guide.

* Performance Improvements:
* Some performance improvements in RAJA::kernel usage with CUDA back-end.


RAJA v0.7.0 Release Notes
=========================

Expand Down
2 changes: 1 addition & 1 deletion blt
Submodule blt updated 131 files
33 changes: 7 additions & 26 deletions cmake/SetupCompilers.cmake
Expand Up @@ -77,44 +77,25 @@ if ( MSVC )
endif()

if (ENABLE_CUDA)
if ( NOT DEFINED RAJA_NVCC_STD )
set(RAJA_NVCC_STD "c++11")
# When we require cmake 3.8+, replace this with setting CUDA_STANDARD
if(CUDA_VERSION_MAJOR GREATER "8")
execute_process(COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc -std c++14 -ccbin ${CMAKE_CXX_COMPILER} .
ERROR_VARIABLE TEST_NVCC_ERR
OUTPUT_QUIET)
if (NOT TEST_NVCC_ERR MATCHES "flag is not supported with the configured host compiler")
set(RAJA_NVCC_STD "c++14")
endif()
else()
endif()
endif()
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda")

if (NOT RAJA_HOST_CONFIG_LOADED)
list(APPEND RAJA_EXTRA_NVCC_FLAGS -restrict; -arch ${CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin; ${CMAKE_CXX_COMPILER})

set(RAJA_NVCC_FLAGS_RELEASE -O2 CACHE STRING "")
set(RAJA_NVCC_FLAGS_DEBUG -g; -G; -O0 CACHE STRING "")
set(RAJA_NVCC_FLAGS_MINSIZEREL -Os CACHE STRING "")
set(RAJA_NVCC_FLAGS_RELWITHDEBINFO -g; -lineinfo; -O2 CACHE STRING "")
set(CMAKE_CUDA_FLAGS_RELEASE "-O2")
set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2")

if(RAJA_ENABLE_COVERAGE)
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
message(INFO "Coverage analysis enabled")
set(RAJA_EXTRA_NVCC_FLAGS ${RAJA_EXTRA_NVCC_FLAGS}; -Xcompiler -coverage; -Xlinker -coverage)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -coverage -Xlinker -coverage")
set(CMAKE_EXE_LINKER_FLAGS "-coverage ${CMAKE_EXE_LINKER_FLAGS}")
else()
message(WARNING "Code coverage specified but not enabled -- GCC was not detected")
endif()
endif()
endif()
set(RAJA_NVCC_FLAGS ${RAJA_EXTRA_NVCC_FLAGS} CACHE STRING "")
set(CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS})
set(CUDA_NVCC_FLAGS_RELEASE ${RAJA_NVCC_FLAGS_RELEASE})
set(CUDA_NVCC_FLAGS_DEBUG ${RAJA_NVCC_FLAGS_DEBUG})
set(CUDA_NVCC_FLAGS_MINSIZEREL ${RAJA_NVCC_FLAGS_MINSIZEREL})
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ${RAJA_NVCC_FLAGS_RELWITHDEBINFO})
endif()
# end RAJA_ENABLE_CUDA section

Expand Down
18 changes: 18 additions & 0 deletions cmake/thirdparty/FindCUB.cmake
@@ -0,0 +1,18 @@
include (FindPackageHandleStandardArgs)

find_path(CUB_INCLUDE_DIRS
NAMES cub/cub.cuh
HINTS
${CUB_DIR}/
${CUB_DIR}/include)

find_package_handle_standard_args(
CUB
DEFAULT_MSG
CUB_INCLUDE_DIRS)

if (CUB_INCLUDE_DIRS)
set(CUB_FOUND True)
else ()
set(CUB_FOUND False)
endif()
2 changes: 1 addition & 1 deletion docs/sphinx/user_guide/conf.py
Expand Up @@ -58,7 +58,7 @@

# General information about the project.
project = u'RAJA'
copyright = u'2016-2018'
copyright = u'2016-2019'
author = u'LLNS'

# The version info for the project you're documenting, acts as replacement for
Expand Down
86 changes: 75 additions & 11 deletions docs/sphinx/user_guide/config_options.rst
Expand Up @@ -48,18 +48,27 @@ Following CMake conventions, RAJA supports three build types: ``Release``,
choose a build type that includes debug information, you do not have to specify
the '-g' compiler flag to generate debugging symbols.

All RAJA options are set like standard CMake variables. For example, to enable
RAJA OpenMP functionality, pass the following argument to cmake::
All RAJA options are set like standard CMake variables. All RAJA settings for
default options, compilers, flags for optimization, etc. can be found in files
in the ``RAJA/cmake`` directory. Configuration variables can be set by passing
arguments to CMake on the command line when CMake is called, or by setting
options in a CMake cache file and passing that file to CMake. For example,
to enable RAJA OpenMP functionality, pass the following argument to cmake::

-DENABLE_OPENMP=On

All RAJA settings for default options, compilers, flags for optimization, etc.
can be found in files in the ``RAJA/cmake`` directory. Next, we
summarize the available options and their defaults
The RAJA repository contains a collection of CMake cache files
(or 'host-config' files) that may be used as a guide for users trying
to set their own options. See :ref:`configopt-raja-hostconfig-label`.

=================================
Available Options and Defaults
=================================
Next, we summarize RAJA options and their defaults.


.. _configopt-raja-features-label:

====================================
Available RAJA Options and Defaults
====================================

RAJA uses a variety of custom variables to control how it is compiled. Many
of these are used internally to control RAJA compilation and do
Expand Down Expand Up @@ -96,7 +105,7 @@ and their default settings:
ENABLE_WARNINGS_AS_ERRORS Off
========================= ======================

* **Programming models and compilers**
* **Programming model back-ends**

Variables that control which RAJA programming model back-ends are enabled
are (names are descriptive of what they enable):
Expand Down Expand Up @@ -127,8 +136,8 @@ and their default settings:
for RAJA CUDA scans. Since the CUB library is included in RAJA as a
Git submodule, users should not have to set this in most scenarios.

.. note:: When using the NVIDIA nvcc compiler for RAJA CUDA functionality,
the variable 'RAJA_NVCC_FLAGS' should be used to pass flags to nvcc.
.. note:: See :ref:`configopt-raja-backends-label` for more information about
setting compiler flags and other options for RAJA back-ends.

* **Data types, sizes, alignment, etc.**

Expand Down Expand Up @@ -281,6 +290,61 @@ and their default settings:
recovery overhead, etc.)
============================= ========================================


.. _configopt-raja-backends-label:

===============================
Setting RAJA Back-End Features
===============================

To access compiler and hardware optimization features, it is often necessary
to pass options to a compiler. This sections describes how to do this and
which CMake variables to use for certain cases.

* **OpenMP Compiler Options**

The variable `OpenMP_CXX_FLAGS` is used to pass OpenMP-related flags to a
compiler. Option syntax follows the CMake *list* pattern. Here is an example
showing how to specify OpenMP target back-end options for the clang compiler
as a CMake option::

cmake \
....
-DOpenMP_CXX_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda;-fopenmp-implicit-declare-target"
....

* **CUDA Compiler Options**

When using the NVIDIA nvcc compiler for RAJA CUDA functionality, the variables
`CMAKE_CUDA_FLAGS_RELEASE`, `CMAKE_CUDA_FLAGS_DEBUG`, and
'CMAKE_CUDA_FLAGS_RELWITHDEBINFO` (corresponding to the standard CMake build
types) are used to pass flags to nvcc.

.. note:: When nvcc must pass options to the host compiler, the arguments
can be included in these CMake variables. Each host compiler
option must be prepended with the `-Xcompiler` directive.

To set the CUDA architecture level for the nvcc compiler, which should be
chosen based on the NVIDIA GPU hardware you are using, you can use the
`CUDA_ARCH` CMake variable. For example, the CMake option::

-DCUDA_ARCH=sm_60

will tell the compiler to use the `sm_60` SASS architecture in its second
stage of compilation. It will pick the PTX architecture to use in the first
stage of compilation that is suitable for the SASS architecture you specify.

Alternatively, you may specify the PTX and SASS architectures, using
appropriate nvcc options in the `CMAKE_CUDA_FLAGS_*` variables.

.. note:: RAJA requires a minimum CUDA architecture level of `sm_35` to use
all supported CUDA features. Mostly, the architecture level affects
which RAJA CUDA atomic operations are available and how they are
implemented inside RAJA. This is described in :ref:`atomics-label`.


.. _configopt-raja-hostconfig-label:

=======================
RAJA Host-Config Files
=======================
Expand Down
28 changes: 28 additions & 0 deletions docs/sphinx/user_guide/feature/atomic.rst
Expand Up @@ -37,6 +37,7 @@ RAJA atomic support includes a variety of the most common atomic operations.
* Each method described in the table below returns the value of
the potentially modified argument (i.e., \*acc) immediately before
the atomic operation is applied, in case it is needed by a user.
* See :ref:`atomics-label` for details about CUDA atomic operations.

^^^^^^^^^^^
Arithmetic
Expand Down Expand Up @@ -131,3 +132,30 @@ Atomic Policies

For more information about available RAJA atomic policies, please see
:ref:`atomicpolicy-label`.


.. _cudaatomics-label:

---------------------------------------
CUDA Atomics Architecture Dependencies
---------------------------------------

The internal implementations for RAJA atomic operations may vary depending
on which CUDA architecture is available and/or specified when the RAJA
is configured for compilation. The following rules apply when the following
CUDA architecture level is chosen:

* **CUDA architecture is lower than `sm_35`**

* Certain atomics will be implemented using CUDA `atomicCAS`
(Compare and Swap).

* **CUDA architecture is `sm_35` or higher**

* CUDA native 64-bit unsigned atomicMin, atomicMax, atomicAnd, atomicOr,
atomicXor are used.

* **CUDA architecture is `sm_60` or higher**

* CUDA native 64-bit double `atomicAdd` is used.

0 comments on commit 8d19a8c

Please sign in to comment.