From 11dcbd1b11453acaa146e2c362f4cdf2dfcacf5c Mon Sep 17 00:00:00 2001 From: Pradeep Garigipati Date: Wed, 11 Mar 2026 13:29:58 +0530 Subject: [PATCH 1/5] Make cmake option prefix consistent for all options --- CMakeLists.txt | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5972a2..8bf191e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,8 +25,8 @@ project( DESCRIPTION "GPU memory management and data representation library") # Options -option(BUILD_TESTS "Build the test suite" ON) -option(BUILD_BENCHMARKS "Build the benchmark suite" ON) +option(CUCASCADE_BUILD_TESTS "Build the test suite" ON) +option(CUCASCADE_BUILD_BENCHMARKS "Build the benchmark suite" ON) option(CUCASCADE_BUILD_SHARED_LIBS "Build shared library" ON) option(CUCASCADE_BUILD_STATIC_LIBS "Build static library" ON) @@ -60,7 +60,7 @@ endif() # ============================================================================= # Compiler warnings # ============================================================================= -option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" ON) +option(CUCASCADE_WARNINGS_AS_ERRORS "Treat compiler warnings as errors" ON) # Warning flags for C/C++ set(CUCASCADE_CXX_WARNING_FLAGS @@ -120,11 +120,13 @@ target_link_libraries(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) # Language standards set via properties (not compile features) to avoid # propagating cuda_std_* requirements to consumers without CUDA enabled. -set_target_properties(cucascade_objects PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON) +set_target_properties( + cucascade_objects + PROPERTIES CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON) # Position independent code (required for shared library) set_target_properties(cucascade_objects PROPERTIES POSITION_INDEPENDENT_CODE ON) From 39bb001104c4f7f6b8d98de67db89de806099287 Mon Sep 17 00:00:00 2001 From: Pradeep Garigipati Date: Wed, 11 Mar 2026 14:09:54 +0530 Subject: [PATCH 2/5] Make CXX/CUDA language standards part of public/private interface, respectively --- CMakeLists.txt | 23 +++++++++++------------ benchmark/CMakeLists.txt | 2 -- test/CMakeLists.txt | 11 +++++------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf191e..a8e0145 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ set(CUCASCADE_PUBLIC_INCLUDE_DIRS set(CUCASCADE_PUBLIC_LINK_LIBS rmm::rmm cudf::cudf CUDA::cudart CUDA::nvml Threads::Threads ${NUMA_LIB}) +# cxx_std_20 is PUBLIC so consumers know they need C++20 to use our headers. +# cuda_std_20 is kept separate and applied PRIVATE to cucascade_objects to avoid +# propagating it to consumers that have no CUDA compiler requirement. +set(CUCASCADE_PUBLIC_COMPILE_FEATURES cxx_std_20) + # Set include directories for the object library target_include_directories(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) @@ -118,18 +123,12 @@ target_include_directories(cucascade_objects # Link dependencies to object library target_link_libraries(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) -# Language standards set via properties (not compile features) to avoid -# propagating cuda_std_* requirements to consumers without CUDA enabled. -set_target_properties( - cucascade_objects - PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON) - -# Position independent code (required for shared library) -set_target_properties(cucascade_objects PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_compile_features(cucascade_objects + PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) +target_compile_features(cucascade_objects PRIVATE cuda_std_20) + +set_target_properties(cucascade_objects PROPERTIES CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON) # ============================================================================= # Static library diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 0696cea..94f202c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -45,8 +45,6 @@ set(BENCHMARK_SOURCES # Create benchmark executable add_executable(cucascade_benchmarks ${BENCHMARK_SOURCES}) -set_target_properties(cucascade_benchmarks PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON) # Set include directories target_include_directories( diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1c187c3..5426368 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,12 +47,11 @@ set(TEST_SOURCES # Create test executable add_executable(cucascade_tests ${TEST_SOURCES}) -set_target_properties(cucascade_tests PROPERTIES CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON - CUDA_ARCHITECTURES - "${CMAKE_CUDA_ARCHITECTURES}") +set_target_properties( + cucascade_tests + PROPERTIES CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}") # Set include directories target_include_directories( From 894c9c8c779763e1dde172e3db6090679a452ca9 Mon Sep 17 00:00:00 2001 From: Pradeep Garigipati Date: Wed, 11 Mar 2026 14:13:02 +0530 Subject: [PATCH 3/5] Fix static_assert for RMM 26.x: replace removed polyfill namespace rmm::detail::polyfill::async_resource_with was removed in RMM 26.x. Use cuda::mr::resource_with directly for RMM >= 26, and keep the polyfill path for older versions. --- .../memory/small_pinned_host_memory_resource.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/cucascade/memory/small_pinned_host_memory_resource.hpp b/include/cucascade/memory/small_pinned_host_memory_resource.hpp index 54b6877..e13d11e 100644 --- a/include/cucascade/memory/small_pinned_host_memory_resource.hpp +++ b/include/cucascade/memory/small_pinned_host_memory_resource.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -137,9 +138,17 @@ class small_pinned_host_memory_resource : public rmm::mr::device_memory_resource std::vector owned_allocations_; }; +// rmm::detail::polyfill::async_resource_with was removed in RMM 26.x; +// use cuda::mr::resource_with directly for newer versions. +#if RMM_VERSION_MAJOR >= 26 +static_assert(cuda::mr::resource_with); +#else static_assert(rmm::detail::polyfill::async_resource_with); +#endif } // namespace memory } // namespace cucascade From 02ec97c1c600583af0f5758d4031b5c8cd9d6568 Mon Sep 17 00:00:00 2001 From: Pradeep Garigipati Date: Wed, 11 Mar 2026 14:14:27 +0530 Subject: [PATCH 4/5] Fix some unrelated code formatting --- src/data/representation_converter.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/data/representation_converter.cpp b/src/data/representation_converter.cpp index 54a23e6..8790078 100644 --- a/src/data/representation_converter.cpp +++ b/src/data/representation_converter.cpp @@ -508,12 +508,12 @@ static memory::column_metadata plan_column_copy(const cudf::column_view& col, memory::column_metadata offsets_meta{}; offsets_meta.type_id = cudf::type_id::INT32; offsets_meta.num_rows = 1; // one offset value (0) for 0 strings - offsets_meta.null_count = 0; + offsets_meta.null_count = 0; offsets_meta.has_null_mask = false; - offsets_meta.has_data = true; - offsets_meta.data_size = sizeof(int32_t); - current_offset = align_up_fast(current_offset, 8u); - offsets_meta.data_offset = current_offset; + offsets_meta.has_data = true; + offsets_meta.data_size = sizeof(int32_t); + current_offset = align_up_fast(current_offset, 8u); + offsets_meta.data_offset = current_offset; current_offset += offsets_meta.data_size; offsets_meta.is_synthetic_empty_offsets = true; meta.children.push_back(std::move(offsets_meta)); @@ -558,11 +558,12 @@ static void collect_d2h_ops(const void* src, } /** - * @brief Zero a region in the host allocation (used for synthetic STRING offsets with no device source). + * @brief Zero a region in the host allocation (used for synthetic STRING offsets with no device + * source). */ static void zero_region(memory::fixed_multiple_blocks_allocation& alloc, - std::size_t alloc_offset, - std::size_t size) + std::size_t alloc_offset, + std::size_t size) { if (size == 0 || !alloc || alloc->size() == 0) { return; } const std::size_t block_size = alloc->block_size(); @@ -787,9 +788,7 @@ std::unique_ptr convert_host_fast_to_gpu( { auto& fast_source = source.cast(); const auto& fast_table = fast_source.get_host_table(); - if (!fast_table) { - throw std::runtime_error("convert_host_fast_to_gpu: host table is null"); - } + if (!fast_table) { throw std::runtime_error("convert_host_fast_to_gpu: host table is null"); } if (!fast_table->allocation) { throw std::runtime_error("convert_host_fast_to_gpu: host table allocation is null"); } From f015fbcd84d37961d1666dca1505c423bd26709c Mon Sep 17 00:00:00 2001 From: Pradeep Garigipati Date: Wed, 11 Mar 2026 14:45:23 +0530 Subject: [PATCH 5/5] Fix missed option rename checks --- CMakeLists.txt | 21 ++++++++------------- test/CMakeLists.txt | 7 ++----- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8e0145..05f4aab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,11 +111,6 @@ set(CUCASCADE_PUBLIC_INCLUDE_DIRS set(CUCASCADE_PUBLIC_LINK_LIBS rmm::rmm cudf::cudf CUDA::cudart CUDA::nvml Threads::Threads ${NUMA_LIB}) -# cxx_std_20 is PUBLIC so consumers know they need C++20 to use our headers. -# cuda_std_20 is kept separate and applied PRIVATE to cucascade_objects to avoid -# propagating it to consumers that have no CUDA compiler requirement. -set(CUCASCADE_PUBLIC_COMPILE_FEATURES cxx_std_20) - # Set include directories for the object library target_include_directories(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) @@ -123,8 +118,10 @@ target_include_directories(cucascade_objects # Link dependencies to object library target_link_libraries(cucascade_objects PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) -target_compile_features(cucascade_objects - PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) +# cxx_std_20 is PUBLIC so consumers know they need C++20 to use our headers. +# cuda_std_20 is kept separate and applied PRIVATE to cucascade_objects to avoid +# propagating it to consumers that have no CUDA compiler requirement. +target_compile_features(cucascade_objects PUBLIC cxx_std_20) target_compile_features(cucascade_objects PRIVATE cuda_std_20) set_target_properties(cucascade_objects PROPERTIES CXX_EXTENSIONS OFF @@ -140,8 +137,7 @@ if(CUCASCADE_BUILD_STATIC_LIBS) target_include_directories(cucascade_static PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) target_link_libraries(cucascade_static PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) - target_compile_features(cucascade_static - PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) + target_compile_features(cucascade_static PUBLIC cxx_std_20) set_target_properties( cucascade_static PROPERTIES OUTPUT_NAME cucascade EXPORT_NAME @@ -158,8 +154,7 @@ if(CUCASCADE_BUILD_SHARED_LIBS) target_include_directories(cucascade_shared PUBLIC ${CUCASCADE_PUBLIC_INCLUDE_DIRS}) target_link_libraries(cucascade_shared PUBLIC ${CUCASCADE_PUBLIC_LINK_LIBS}) - target_compile_features(cucascade_shared - PUBLIC ${CUCASCADE_PUBLIC_COMPILE_FEATURES}) + target_compile_features(cucascade_shared PUBLIC cxx_std_20) set_target_properties( cucascade_shared @@ -240,7 +235,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/cuCascadeConfig.cmake # ============================================================================= # Tests # ============================================================================= -if(BUILD_TESTS) +if(CUCASCADE_BUILD_TESTS) enable_testing() add_subdirectory(test) endif() @@ -248,6 +243,6 @@ endif() # ============================================================================= # Benchmarks # ============================================================================= -if(BUILD_BENCHMARKS) +if(CUCASCADE_BUILD_BENCHMARKS) add_subdirectory(benchmark) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5426368..b0eef1e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,11 +47,8 @@ set(TEST_SOURCES # Create test executable add_executable(cucascade_tests ${TEST_SOURCES}) -set_target_properties( - cucascade_tests - PROPERTIES CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON - CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}") +set_target_properties(cucascade_tests PROPERTIES CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON) # Set include directories target_include_directories(