Skip to content

CUB: ill-formed class-head name in device_transform.cuh breaks GCC (CUDA 13.2) #8833

@mc-nv

Description

@mc-nv

Bug type

Compile-time Error

Affected component

CUB

Bug description

Commit 1e3b0b4
PR: #6844

("Avoid use of cccl namespace macros in cub") removed _CCCL_BEGIN_NAMESPACE_CUDA /
_CCCL_END_NAMESPACE_CUDA macros and replaced them with inline explicit qualification.
The resulting code in cub/device/device_transform.cuh:44 uses ::cuda:: as the
class-head name in a template specialization:

struct ::cuda::proclaims_copyable_arguments<CUB_NS_QUALIFIER::detail::__return_constant<T>>
    : ::cuda::std::true_type

This is ill-formed C++. The C++ standard forbids a globally-qualified name (::cuda::)
as the class-head name of a struct/class definition or template specialization. The leading
:: is valid in expression and type-specifier contexts, but not in a class-head name.

NVCC on x86 silently accepts this as a non-conforming extension, but GCC on ARM/SBSA
treats it as a hard error
, breaking any downstream project that includes <cub/cub.cuh>
when building with CUDA 13.2+ on SBSA.

Error message

cub/device/device_transform.cuh:44:134:
error: global qualification of class name is invalid before ':' token
   44 | struct ::cuda::proclaims_copyable_arguments<...> : ::cuda::std::true_type
      |                                                                          ^
error: expected '{' before ':' token

Reproducer

Run the shell script below — it creates two CMake projects and a Dockerfile, then
launches docker build using the exact image that ships the buggy headers.
The good project compiles cleanly; the bad project fails with the GCC error above.

#!/bin/bash
set -e

rm -rf /tmp/cccl_reproducer_bad /tmp/cccl_reproducer_good
mkdir -p /tmp/cccl_reproducer_bad /tmp/cccl_reproducer_good

cat << 'REPRODUCER_BAD_EOF' > /tmp/cccl_reproducer_bad/reproducer.cpp
#include <type_traits>

namespace cuda {
    template <typename T>
    struct proclaims_copyable_arguments : std::false_type {};
}
namespace detail {
    template <typename T> struct __return_constant {};
}

// ill-formed: leading :: forbidden in class-head name
// mirrors cub/device/device_transform.cuh:44 after commit 1e3b0b4
template <typename T>
struct ::cuda::proclaims_copyable_arguments<detail::__return_constant<T>>
    : std::true_type {};

int main() {}
REPRODUCER_BAD_EOF

cat << 'REPRODUCER_GOOD_EOF' > /tmp/cccl_reproducer_good/reproducer.cpp
#include <type_traits>

namespace cuda {
    template <typename T>
    struct proclaims_copyable_arguments : std::false_type {};
}
namespace detail {
    template <typename T> struct __return_constant {};
}

// correct: relative qualification in class-head name
// one-character fix: remove leading :: from class-head name
template <typename T>
struct cuda::proclaims_copyable_arguments<detail::__return_constant<T>>
    : std::true_type {};

int main() {}
REPRODUCER_GOOD_EOF

cat << 'CMAKE_EOF' > /tmp/cccl_reproducer_bad/CMakeLists.txt
cmake_minimum_required(VERSION 3.20)
project(cccl_reproducer_bad CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
find_package(CUDA REQUIRED)
add_executable(reproducer reproducer.cpp)
CMAKE_EOF

cat << 'CMAKE_GOOD_EOF' > /tmp/cccl_reproducer_good/CMakeLists.txt
cmake_minimum_required(VERSION 3.20)
project(cccl_reproducer_good CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
find_package(CUDA REQUIRED)
add_executable(reproducer reproducer.cpp)
CMAKE_GOOD_EOF

cat > /tmp/Dockerfile.cccl_reproducer << 'DOCKERFILE_EOF'
FROM nvcr.io/nvidia/cuda-dl-base:26.04-cuda13.2-devel-ubuntu24.04

COPY ./cccl_reproducer_good /cccl_reproducer_good
COPY ./cccl_reproducer_bad  /cccl_reproducer_bad

RUN apt-get update -qq && apt-get install -y cmake build-essential

RUN echo -e "�[32m ==Device Transform Cuh:44-50== �[0m"     && echo -e "�[33m $(sed -n '40,50p' /usr/local/cuda/include/cccl/cub/device/device_transform.cuh) �[0m"

WORKDIR /cccl_reproducer_good
RUN cmake -B /cccl_build_good -S /cccl_reproducer_good && cmake --build /cccl_build_good

WORKDIR /cccl_reproducer_bad
RUN cmake -B /cccl_build_bad -S /cccl_reproducer_bad && cmake --build /cccl_build_bad
DOCKERFILE_EOF

docker build --progress=plain -t cccl_reproducer -f /tmp/Dockerfile.cccl_reproducer /tmp

Suggested fix

- struct ::cuda::proclaims_copyable_arguments<CUB_NS_QUALIFIER::detail::__return_constant<T>>
+ struct cuda::proclaims_copyable_arguments<CUB_NS_QUALIFIER::detail::__return_constant<T>>
      : ::cuda::std::true_type

Same fix applies to any other class-head names introduced by the same commit
that use ::cuda:: or ::cuda::std:: as the leading qualifier.

Environment

CCCL version Introduced in commit 1e3b0b4, present in CUDA 13.2
Affected file cub/device/device_transform.cuh:44
Failing compiler GCC 13+
Reproducer image nvcr.io/nvidia/cuda-dl-base:26.04-cuda13.2-devel-ubuntu24.04

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

Status

Done

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions