diff --git a/CMakeLists.txt b/CMakeLists.txt index f5972a2..05f4aab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,8 +25,8 @@ project( DESCRIPTION "GPU memory management and data representation library") # Options -option(BUILD_TESTS "Build the test suite" ON) -option(BUILD_BENCHMARKS "Build the benchmark suite" ON) +option(CUCASCADE_BUILD_TESTS "Build the test suite" ON) +option(CUCASCADE_BUILD_BENCHMARKS "Build the benchmark suite" ON) option(CUCASCADE_BUILD_SHARED_LIBS "Build shared library" ON) option(CUCASCADE_BUILD_STATIC_LIBS "Build static library" ON) @@ -60,7 +60,7 @@ endif() # ============================================================================= # Compiler warnings # ============================================================================= -option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" ON) +option(CUCASCADE_WARNINGS_AS_ERRORS "Treat compiler warnings as errors" ON) # Warning flags for C/C++ set(CUCASCADE_CXX_WARNING_FLAGS @@ -118,16 +118,14 @@ target_include_directories(cucascade_objects # Link dependencies to object library target_link_libraries(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) -# Language standards set via properties (not compile features) to avoid -# propagating cuda_std_* requirements to consumers without CUDA enabled. -set_target_properties(cucascade_objects PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON) +# cxx_std_20 is PUBLIC so consumers know they need C++20 to use our headers. +# cuda_std_20 is kept separate and applied PRIVATE to cucascade_objects to avoid +# propagating it to consumers that have no CUDA compiler requirement. +target_compile_features(cucascade_objects PUBLIC cxx_std_20) +target_compile_features(cucascade_objects PRIVATE cuda_std_20) -# Position independent code (required for shared library) -set_target_properties(cucascade_objects PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(cucascade_objects PROPERTIES CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON) # ============================================================================= # Static library @@ -139,8 +137,7 @@ if(CUCASCADE_BUILD_STATIC_LIBS) target_include_directories(cucascade_static PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) target_link_libraries(cucascade_static PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) - target_compile_features(cucascade_static - PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) + target_compile_features(cucascade_static PUBLIC cxx_std_20) set_target_properties( cucascade_static PROPERTIES OUTPUT_NAME cucascade EXPORT_NAME @@ -157,8 +154,7 @@ if(CUCASCADE_BUILD_SHARED_LIBS) target_include_directories(cucascade_shared PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) target_link_libraries(cucascade_shared PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) - target_compile_features(cucascade_shared - PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) + target_compile_features(cucascade_shared PUBLIC cxx_std_20) set_target_properties( cucascade_shared @@ -239,7 +235,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/cuCascadeConfig.cmake # ============================================================================= # Tests # ============================================================================= -if(BUILD_TESTS) +if(CUCASCADE_BUILD_TESTS) enable_testing() add_subdirectory(test) endif() @@ -247,6 +243,6 @@ endif() # ============================================================================= # Benchmarks # ============================================================================= -if(BUILD_BENCHMARKS) +if(CUCASCADE_BUILD_BENCHMARKS) add_subdirectory(benchmark) endif() diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 0696cea..94f202c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -45,8 +45,6 @@ set(BENCHMARK_SOURCES # Create benchmark executable add_executable(cucascade_benchmarks ${BENCHMARK_SOURCES}) -set_target_properties(cucascade_benchmarks PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON) # Set include directories target_include_directories( diff --git a/include/cucascade/memory/small_pinned_host_memory_resource.hpp b/include/cucascade/memory/small_pinned_host_memory_resource.hpp index 54b6877..e13d11e 100644 --- a/include/cucascade/memory/small_pinned_host_memory_resource.hpp +++ b/include/cucascade/memory/small_pinned_host_memory_resource.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -137,9 +138,17 @@ class small_pinned_host_memory_resource : public rmm::mr::device_memory_resource std::vector owned_allocations_; }; +// rmm::detail::polyfill::async_resource_with was removed in RMM 26.x; +// use cuda::mr::resource_with directly for newer versions. +#if RMM_VERSION_MAJOR >= 26 +static_assert(cuda::mr::resource_with); +#else static_assert(rmm::detail::polyfill::async_resource_with); +#endif } // namespace memory } // namespace cucascade diff --git a/src/data/representation_converter.cpp b/src/data/representation_converter.cpp index 54a23e6..8790078 100644 --- a/src/data/representation_converter.cpp +++ b/src/data/representation_converter.cpp @@ -508,12 +508,12 @@ static memory::column_metadata plan_column_copy(const cudf::column_view& col, memory::column_metadata offsets_meta{}; offsets_meta.type_id = cudf::type_id::INT32; offsets_meta.num_rows = 1; // one offset value (0) for 0 strings - offsets_meta.null_count = 0; + offsets_meta.null_count = 0; offsets_meta.has_null_mask = false; - offsets_meta.has_data = true; - offsets_meta.data_size = sizeof(int32_t); - current_offset = align_up_fast(current_offset, 8u); - offsets_meta.data_offset = current_offset; + offsets_meta.has_data = true; + offsets_meta.data_size = sizeof(int32_t); + current_offset = align_up_fast(current_offset, 8u); + offsets_meta.data_offset = current_offset; current_offset += offsets_meta.data_size; offsets_meta.is_synthetic_empty_offsets = true; meta.children.push_back(std::move(offsets_meta)); @@ -558,11 +558,12 @@ static void collect_d2h_ops(const void* src, } /** - * @brief Zero a region in the host allocation (used for synthetic STRING offsets with no device source). + * @brief Zero a region in the host allocation (used for synthetic STRING offsets with no device + * source). */ static void zero_region(memory::fixed_multiple_blocks_allocation& alloc, - std::size_t alloc_offset, - std::size_t size) + std::size_t alloc_offset, + std::size_t size) { if (size == 0 || !alloc || alloc->size() == 0) { return; } const std::size_t block_size = alloc->block_size(); @@ -787,9 +788,7 @@ std::unique_ptr convert_host_fast_to_gpu( { auto& fast_source = source.cast(); const auto& fast_table = fast_source.get_host_table(); - if (!fast_table) { - throw std::runtime_error("convert_host_fast_to_gpu: host table is null"); - } + if (!fast_table) { throw std::runtime_error("convert_host_fast_to_gpu: host table is null"); } if (!fast_table->allocation) { throw std::runtime_error("convert_host_fast_to_gpu: host table allocation is null"); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1c187c3..b0eef1e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,12 +47,8 @@ set(TEST_SOURCES # Create test executable add_executable(cucascade_tests ${TEST_SOURCES}) -set_target_properties(cucascade_tests PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON - CUDA_ARCHITECTURES - "${CMAKE_CUDA_ARCHITECTURES}") +set_target_properties(cucascade_tests PROPERTIES CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON) # Set include directories target_include_directories(