Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compile DALI with Clang #2416

Merged
merged 14 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 71 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

cmake_minimum_required(VERSION 3.13)

# allow usage of check_symbol_exists() macro
include(CheckCXXSymbolExists)
include(CheckCXXCompilerFlag)
Expand All @@ -26,6 +27,7 @@ if (POLICY CMP0075)
endif()

project(DALI CUDA CXX C)

set(DALI_ROOT ${PROJECT_SOURCE_DIR})
set(CUDA_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")

Expand All @@ -44,6 +46,7 @@ cmake_dependent_option(BUILD_TEST "Build googletest test suite" ON
cmake_dependent_option(BUILD_BENCHMARK "Build benchmark suite" ON
"NOT BUILD_DALI_NODEPS" OFF)
option(BUILD_FUZZING "Build fuzzing suite" OFF)

# if BUILD_NVTX is empty remove it and let is be default
if ("${BUILD_NVTX}" STREQUAL "")
unset(BUILD_NVTX CACHE)
Expand Down Expand Up @@ -123,6 +126,21 @@ cmake_dependent_option(STATIC_LIBS "Build static libraries instead of shared-obj
option(VERBOSE_LOGS "Adds verbose loging to DALI" OFF)
option(WERROR "Treat all warnings as errors" OFF)

cmake_dependent_option(DALI_CLANG_ONLY "Compile DALI using only Clang. Suitable only for developement."
OFF "CMAKE_CXX_COMPILER_ID STREQUAL Clang" OFF)

if (DALI_CLANG_ONLY AND BUILD_NVDEC)
message(STATUS "NVDEC is not supportet when compiling only with Clang. Setting BUILD_NVDEC to OFF.")
JanuszL marked this conversation as resolved.
Show resolved Hide resolved
set(BUILD_NVDEC OFF)
JanuszL marked this conversation as resolved.
Show resolved Hide resolved
endif()

message(STATUS "DALI_CLANG_ONLY -- ${DALI_CLANG_ONLY}")

if (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
message(FATAL_ERROR "Expected CUDA compiler to be set to nvcc. Clang-only build is supported via DALI_CLANG_ONLY
which requires setting Clang as C and CXX compilers and leaving nvcc as CUDA compiler.")
endif()

# ; creates a list here
set (PYTHON_VERSIONS "3.6;3.7;3.8;3.9")

Expand Down Expand Up @@ -219,13 +237,26 @@ endif()
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-register -Wsign-compare")

# CUDA does not support current clang as host compiler, we need use gcc
# TODO(klecki): Plethora of warnings that should be adressed as a followup
if (DALI_CLANG_ONLY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch-bool -Wno-sign-compare -Wno-missing-braces -Wno-absolute-value")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -Wno-inconsistent-missing-override -Wno-implicit-int-float-conversion")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-mismatched-tags -Wno-reorder-ctor -Wno-unused-command-line-argument")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-defaulted-function-deleted -Wno-switch -Wno-cuda-compat -Wno-unused-private-field")
endif()

# CUDA does not support current Clang as host compiler, we need use gcc
# CMAKE_CUDA_HOST_COMPILER variable operates on paths
set(CUDA_UNSUPPORTED_COMPILER 0)
if ("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang")
set(CUDA_UNSUPPORTED_COMPILER 1)
elseif (CMAKE_CUDA_HOST_COMPILER STREQUAL "")
set(CUDA_UNSUPPORTED_COMPILER 1)
if (NOT DALI_CLANG_ONLY)
if ("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang")
set(CUDA_UNSUPPORTED_COMPILER 1)
elseif (CMAKE_CUDA_HOST_COMPILER STREQUAL "")
set(CUDA_UNSUPPORTED_COMPILER 1)
endif()

set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_C_COMPILER})

endif()

if(${CUDA_UNSUPPORTED_COMPILER})
Expand All @@ -239,31 +270,58 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
endif()
endif()


message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")

# OpenMP SIMD support
if(CXX_HAVE_OMP_SIMD)
if(CXX_HAVE_OMP_SIMD AND NOT DALI_CLANG_ONLY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
endif()

# Discover what architectures does nvcc support
CUDA_find_supported_arch_values(CUDA_supported_archs ${CUDA_known_archs})
if (DALI_CLANG_ONLY)
set(USED_CUDA_COMPILER ${CMAKE_CXX_COMPILER})
else()
set(USED_CUDA_COMPILER ${CMAKE_CUDA_COMPILER})
endif()


message(STATUS "CUDA .cu files compiler: ${USED_CUDA_COMPILER}")

CUDA_find_supported_arch_values(CUDA_supported_archs ${USED_CUDA_COMPILER} ${CUDA_known_archs})
JanuszL marked this conversation as resolved.
Show resolved Hide resolved
message(STATUS "CUDA supported archs: ${CUDA_supported_archs}")

set(CUDA_TARGET_ARCHS_SORTED ${CUDA_TARGET_ARCHS})
list(SORT CUDA_TARGET_ARCHS_SORTED)
CUDA_find_supported_arch_values(CUDA_targeted_archs ${CUDA_TARGET_ARCHS_SORTED})
CUDA_find_supported_arch_values(CUDA_targeted_archs ${USED_CUDA_COMPILER} ${CUDA_TARGET_ARCHS_SORTED})
message(STATUS "CUDA targeted archs: ${CUDA_targeted_archs}")
if (NOT CUDA_targeted_archs)
message(FATAL_ERROR "None of the provided CUDA architectures ({${CUDA_TARGET_ARCHS}}) is supported by nvcc, use one or more of: ${CUDA_supported_archs}")
message(FATAL_ERROR "None of the provided CUDA architectures ({${CUDA_TARGET_ARCHS}})"
" is supported by ${USED_CUDA_COMPILER}, use one or more of: ${CUDA_supported_archs}")
endif()

CUDA_get_gencode_args(CUDA_gencode_flags ${CUDA_targeted_archs})
message(STATUS "Generated gencode flags: ${CUDA_gencode_flags}")
# Add gpu-arch and toolkit flags for clang when compiling cuda (if used)
if (DALI_CLANG_ONLY)
CUDA_get_gencode_args(CUDA_gencode_flags_clang ${USED_CUDA_COMPILER} ${CUDA_targeted_archs})
message(STATUS "Generated gencode flags for clang: ${CUDA_gencode_flags_clang}")
CUDA_get_toolkit_from_compiler(CUDA_TOOLKIT_PATH_VAR)
message(STATUS "Used CUDA toolkit: ${CUDA_TOOLKIT_PATH_VAR}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --cuda-path=${CUDA_TOOLKIT_PATH_VAR} ${CUDA_gencode_flags_clang}")
endif()

# Add ptx & bin flags for cuda
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_gencode_flags} --compiler-options -fvisibility=hidden --Wno-deprecated-gpu-targets")
# Add ptx & bin flags for cuda compiler (nvcc)
if(USE_CMAKE_CUDA_ARCHITECTURES)
CUDA_get_cmake_cuda_archs(CMAKE_CUDA_ARCHITECTURES ${CUDA_targeted_archs})
message(STATUS "Generated CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
else()
CUDA_get_gencode_args(CUDA_gencode_flags_nvcc ${CMAKE_CUDA_COMPILER} ${CUDA_targeted_archs})
message(STATUS "Generated gencode flags: ${CUDA_gencode_flags_nvcc}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_gencode_flags_nvcc}")
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options -fvisibility=hidden --Wno-deprecated-gpu-targets")

# Include directories
include_directories(
Expand Down
104 changes: 85 additions & 19 deletions cmake/CUDA_utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,30 @@
# limitations under the License.


# Use CMAKE_CUDA_COMPILER to obtain the path to CUDA toolkint.
# Needed when compiling with Clang only
function(CUDA_get_toolkit_from_compiler TOOLKIT_PATH)
get_filename_component(TOOLKIT_PATH_TMP_VAR "${CMAKE_CUDA_COMPILER}/../.." ABSOLUTE)
set(${TOOLKIT_PATH} ${TOOLKIT_PATH_TMP_VAR} PARENT_SCOPE)
endfunction()

# When compiling CUDA with Clang only (DALI_CLANG_ONLY=ON), we need to change the
# language properties of .cu files to allow them to use the CXX compiler (which will be Clang).
# Setting that property has narrow scope of current CMakeLists.txt, so we do this at the point
# just before creating a target.
# Clang will compile files as CUDA based on their extension.
function(adjust_source_file_language_property SOURCES)
if (DALI_CLANG_ONLY)
foreach(File IN LISTS SOURCES)
if(File MATCHES ".*\.cu$")
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX)
endif()
endforeach()
endif()
endfunction()



# List of currently used arch values
if (${ARCH} MATCHES "aarch64-")
# aarch64-linux and aarch64-qnx
Expand All @@ -36,19 +60,25 @@ endif()
# Equivalent to dry-running preprocessing on /dev/null as .cu file
# and checking the exit code
# $ nvcc ${flags} --dryrun -E -x cu /dev/null
# or
# $ clang++ ${flags} -E -x cuda /dev/null
#
# @param out_status TRUE iff exit code is 0, FALSE otherwise
# @param nvcc_bin nvcc binary to use in shell invocation
# @param flags flags to check
# @return out_status
function(CUDA_check_nvcc_flag out_status nvcc_bin flags)
set(preprocess_empty_cu_file "--dryrun" "-E" "-x" "cu" "/dev/null")
set(nvcc_command ${flags} ${preprocess_empty_cu_file})
# Run nvcc and check the exit status
execute_process(COMMAND ${nvcc_bin} ${nvcc_command}
function(CUDA_check_cudacc_flag out_status compiler flags)
if (${compiler} MATCHES "clang")
set(preprocess_empty_cu_file "-E" "-x" "cuda" "/dev/null")
else()
set(preprocess_empty_cu_file "--dryrun" "-E" "-x" "cu" "/dev/null")
endif()
set(cudacc_command ${flags} ${preprocess_empty_cu_file})
# Run the compiler and check the exit status
execute_process(COMMAND ${compiler} ${cudacc_command}
RESULT_VARIABLE tmp_out_status
OUTPUT_QUIET
ERROR_QUIET)
ERROR_QUIET
)
if (${tmp_out_status} EQUAL 0)
set(${out_status} TRUE PARENT_SCOPE)
else()
Expand All @@ -58,44 +88,80 @@ endfunction()

# Given the list of arch values, check which are supported by
#
# @param out_arch_values_allowed List of arch values supported by nvcc
# @param arch_values_to_check List of values to be checked against nvcc
# @param out_arch_values_allowed List of arch values supported by the specified compiler
# @param compiler What compiler to use for this check
# @param arch_values_to_check List of values to be checked against the specified compiler
# for example: 60;61;70;75
# @return out_arch_values_allowed
function(CUDA_find_supported_arch_values out_arch_values_allowed arch_values_to_check)
function(CUDA_find_supported_arch_values out_arch_values_allowed compiler arch_values_to_check)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values_to_check} ${ARGN})
set(nvcc "${CMAKE_CUDA_COMPILER}")
foreach(arch IN LISTS arch_list ITEMS)
CUDA_check_nvcc_flag(supported ${nvcc} "-arch=sm_${arch}")
if (${compiler} MATCHES "clang")
CUDA_check_cudacc_flag(supported ${compiler} "--cuda-gpu-arch=sm_${arch}")
else()
CUDA_check_cudacc_flag(supported ${compiler} "-arch=sm_${arch}")
endif()
if (supported)
set(out_list ${out_list} ${arch})
endif()
endforeach(arch)
set(${out_arch_values_allowed} ${out_list} PARENT_SCOPE)
endfunction()

# Generate -gencode arch=compute_XX,code=sm_XX for list of supported arch values
# Generate -gencode arch=compute_XX,code=sm_XX or --cuda-gpu-arch=sm_XX for list of supported
# arch values based on the specified compiler.
# List should be sorted in increasing order.
# The last arch value will be repeated as -gencode arch=compute_XX,code=compute_XX
#
# If nvcc is used, the last arch value will be repeated as -gencode arch=compute_XX,code=compute_XX
# to ensure the generation of PTX for most recent virtual architecture
# and maintain forward compatibility
# and maintain forward compatibility.
#
# @param out_args_string output string containing appropriate CMAKE_CUDA_FLAGS
# @param out_args_string output string containing appropriate CMAKE_CUDA_FLAGS/CMAKE_CXX_FLAGS
# @param compiler What compiler to generate flags for
# @param arch_values list of arch values to use
# @return out_args_string
function(CUDA_get_gencode_args out_args_string arch_values)
function(CUDA_get_gencode_args out_args_string compiler arch_values)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values} ${ARGN})
set(out "")
foreach(arch IN LISTS arch_list)
if (${compiler} MATCHES "clang")
set(out "${out} --cuda-gpu-arch=sm_${arch}")
else()
set(out "${out} -gencode arch=compute_${arch},code=sm_${arch}")
endif()
endforeach(arch)

if (NOT ${compiler} MATCHES "clang")
# Repeat the last one as to ensure the generation of PTX for most
# recent virtual architecture for forward compatibility
list(GET arch_list -1 last_arch)
set(out "${out} -gencode arch=compute_${last_arch},code=compute_${last_arch}")
endif()

set(${out_args_string} ${out} PARENT_SCOPE)
endfunction()

# Generate list of xx-real for every specified supported architecture.
# List should be sorted in increasing order.
#
# The last one will also be repeated as xx-virtual to ensure the generation of PTX for most recent
# virtual architecture and maintain forward compatibility.
function(CUDA_get_cmake_cuda_archs out_args_list arch_values)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values} ${ARGN})
set(out "")
foreach(arch IN LISTS arch_list)
set(out "${out};${arch}-real")
endforeach(arch)

# Repeat the last one as to ensure the generation of PTX for most
# recent virtual architecture for forward compatibility
list(GET arch_list -1 last_arch)
set(out "${out} -gencode arch=compute_${last_arch},code=compute_${last_arch}")
set(${out_args_string} ${out} PARENT_SCOPE)
set(out "${out};${last_arch}-virtual")

set(${out_args_list} ${out} PARENT_SCOPE)
endfunction()


Expand Down
2 changes: 2 additions & 0 deletions dali/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ collect_headers(DALI_INST_HDRS PARENT_SCOPE)
collect_sources(DALI_SRCS PARENT_SCOPE)
if (BUILD_PROTOBUF)
set(DALI_PROTO_OBJ $<TARGET_OBJECTS:DALI_PROTO>)
adjust_source_file_language_property("${DALI_SRCS}")
add_library(dali ${LIBTYPE} ${DALI_SRCS} ${DALI_PROTO_OBJ} ${CUDART_LIB})
set_target_properties(dali PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${DALI_LIBRARY_OUTPUT_DIR}")
endif()
Expand Down Expand Up @@ -86,6 +87,7 @@ endif()
################################################
if (BUILD_DALI_PIPELINE AND BUILD_TEST)
add_subdirectory(test)
adjust_source_file_language_property("${DALI_TEST_SRCS}")
add_executable(dali_test "${DALI_TEST_SRCS}")

target_link_libraries(dali_test PUBLIC dali dali_core dali_kernels dali_operators ${DALI_LIBS} gtest)
Expand Down
1 change: 1 addition & 0 deletions dali/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ if (BUILD_BENCHMARK)
list(APPEND DALI_BENCHMARK_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/caffe2_alexnet_bench.cc")
endif()

adjust_source_file_language_property("${DALI_BENCHMARK_SRCS}")
add_executable(dali_benchmark "${DALI_BENCHMARK_SRCS}")

target_link_libraries(dali_benchmark PRIVATE dali dali_operators benchmark ${DALI_LIBS})
Expand Down
2 changes: 2 additions & 0 deletions dali/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ endif()

list(FILTER DALI_CORE_SRCS EXCLUDE REGEX ".*dynlink_cufile.cc")

adjust_source_file_language_property("${DALI_CORE_SRCS}")
add_library(dali_core ${LIBTYPE} ${DALI_CORE_SRCS})
target_include_directories(dali_core PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_link_libraries(dali_core PRIVATE dynlink_cuda ${CUDART_LIB})
Expand All @@ -89,6 +90,7 @@ configure_file("${DALI_ROOT}/cmake/${lib_exports}.in" "${CMAKE_BINARY_DIR}/${lib
target_link_libraries(dali_core PRIVATE -Wl,--version-script=${CMAKE_BINARY_DIR}/${lib_exports})

if (BUILD_TEST)
adjust_source_file_language_property("${DALI_CORE_TEST_SRCS}")
add_executable(dali_core_test "${DALI_CORE_TEST_SRCS}")
target_link_libraries(dali_core_test PUBLIC dali_core)
target_link_libraries(dali_core_test PRIVATE gtest dynlink_cuda ${DALI_LIBS})
Expand Down
7 changes: 4 additions & 3 deletions dali/core/fast_div_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "dali/core/fast_div.h" // NOLINT
#include <gtest/gtest.h>
#include <cmath>
#include <random>
#include <iostream>
#include "dali/test/device_test.h"
Expand Down Expand Up @@ -275,9 +276,9 @@ TYPED_TEST(FastDivPerf, Perf) {

const int divs_per_thread = 18;

T d1 = max(dist(rng), T(1));
T d2 = max(dist(rng), T(1));
T d3 = max(dist(rng), T(1));
T d1 = std::max(dist(rng), T(1));
T d2 = std::max(dist(rng), T(1));
T d3 = std::max(dist(rng), T(1));

FastDivMod<T><<<1000, 1024>>>(m, d1, d2, d3);
cudaEventRecord(start, 0);
Expand Down
3 changes: 3 additions & 0 deletions dali/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ collect_sources(DALI_KERNEL_SRCS)
collect_test_sources(DALI_KERNEL_TEST_SRCS)

# cuFFT library
adjust_source_file_language_property("${DALI_KERNEL_SRCS}")
set_source_files_properties("dummy.cu" PROPERTIES LANGUAGE CUDA)
add_library(dali_kernels ${LIBTYPE} ${DALI_KERNEL_SRCS})
target_link_libraries(dali_kernels PUBLIC dali_core)
target_link_libraries(dali_kernels PRIVATE ${CUDA_cufft_static_LIBRARY})
Expand All @@ -48,6 +50,7 @@ target_link_libraries(dali_kernels PRIVATE -Wl,--version-script=${CMAKE_BINARY_

if (BUILD_TEST)
# TODO(janton): create a test_utils_lib with dali_test_config.cc and other common utilities
adjust_source_file_language_property("${DALI_KERNEL_TEST_SRCS}")
add_executable(dali_kernel_test
${DALI_KERNEL_TEST_SRCS}
${DALI_ROOT}/dali/test/dali_test_config.cc)
Expand Down
4 changes: 2 additions & 2 deletions dali/kernels/audio/mel_scale/mel_filter_bank_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,10 @@ void MelFilterBankGpu<T, Dims>::Run(
}

template <typename T, int Dims>
MelFilterBankGpu<T, Dims>::MelFilterBankGpu() = default;
MelFilterBankGpu<T, Dims>::MelFilterBankGpu() {}

template <typename T, int Dims>
MelFilterBankGpu<T, Dims>::~MelFilterBankGpu() = default;
MelFilterBankGpu<T, Dims>::~MelFilterBankGpu() {}


template class MelFilterBankGpu<float, 2>;
Expand Down
Loading