Skip to content

Commit 3d9409f

Browse files
authored
[NVPTX] Make ctor/dtor lowering always enabled in NVPTX (llvm#126544)
Summary: Currently we conditionally enable NVPTX lowering depending on the language (C/C++/OpenMP). Unfortunately this causes problems because this option is only present if the backend was enabled, which causes this to error if you try to make LLVM-IR. This patch instead makes it the only accepted lowering. The reason we had it as opt-in before is because it is not handled by CUDA. So, this pach also introduces diagnostics to prevent *all* creation of device-side global constructors and destructors. We already did this for variables, now we do it for attributes as well. This inverts the responsibility of blocking this from the backend to the langauage like it should be given that support for this is language dependent.
1 parent f332455 commit 3d9409f

File tree

12 files changed

+28
-78
lines changed

12 files changed

+28
-78
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9193,6 +9193,8 @@ def err_cuda_device_exceptions : Error<
91939193
def err_dynamic_var_init : Error<
91949194
"dynamic initialization is not supported for "
91959195
"__device__, __constant__, __shared__, and __managed__ variables">;
9196+
def err_cuda_ctor_dtor_attrs
9197+
: Error<"CUDA does not support global %0 for __device__ functions">;
91969198
def err_shared_var_init : Error<
91979199
"initialization is not supported for __shared__ variables">;
91989200
def err_cuda_vla : Error<

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -639,9 +639,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
639639
CmdArgs.push_back(
640640
Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));
641641

642-
// Enable ctor / dtor lowering for the direct / freestanding NVPTX target.
643-
CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
644-
645642
// Add paths for the default clang library path.
646643
SmallString<256> DefaultLibPath =
647644
llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -726,9 +723,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
726723
/// toolchain.
727724
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
728725
const llvm::Triple &HostTriple,
729-
const ArgList &Args, bool Freestanding = false)
730-
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
731-
Freestanding(Freestanding) {
726+
const ArgList &Args)
727+
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
732728
if (CudaInstallation.isValid())
733729
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
734730
// Lookup binaries into the driver directory, this is used to
@@ -740,8 +736,7 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
740736
/// system's default triple if not provided.
741737
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
742738
const ArgList &Args)
743-
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
744-
/*Freestanding=*/true) {}
739+
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {}
745740

746741
llvm::opt::DerivedArgList *
747742
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
@@ -782,13 +777,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
782777

783778
void NVPTXToolChain::addClangTargetOptions(
784779
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
785-
Action::OffloadKind DeviceOffloadingKind) const {
786-
// If we are compiling with a standalone NVPTX toolchain we want to try to
787-
// mimic a standard environment as much as possible. So we enable lowering
788-
// ctor / dtor functions to global symbols that can be registered.
789-
if (Freestanding && !getDriver().isUsingLTO())
790-
CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
791-
}
780+
Action::OffloadKind DeviceOffloadingKind) const {}
792781

793782
bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
794783
const Option &O = A->getOption();

clang/lib/Driver/ToolChains/Cuda.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ namespace toolchains {
132132
class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
133133
public:
134134
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
135-
const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
136-
bool Freestanding);
135+
const llvm::Triple &HostTriple,
136+
const llvm::opt::ArgList &Args);
137137

138138
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
139139
const llvm::opt::ArgList &Args);
@@ -179,9 +179,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
179179
protected:
180180
Tool *buildAssembler() const override; // ptxas.
181181
Tool *buildLinker() const override; // nvlink.
182-
183-
private:
184-
bool Freestanding = false;
185182
};
186183

187184
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7484,6 +7484,15 @@ void Sema::ProcessDeclAttributeList(
74847484
}
74857485
}
74867486

7487+
// Do not permit 'constructor' or 'destructor' attributes on __device__ code.
7488+
if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() &&
7489+
(D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) &&
7490+
!getLangOpts().GPUAllowDeviceInit) {
7491+
Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs)
7492+
<< (D->hasAttr<ConstructorAttr>() ? "constructors" : "destructors");
7493+
D->setInvalidDecl();
7494+
}
7495+
74877496
// Do this check after processing D's attributes because the attribute
74887497
// objc_method_family can change whether the given method is in the init
74897498
// family, and it can be applied after objc_designated_initializer. This is a

clang/test/Driver/cuda-cross-compiling.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,6 @@
5757

5858
// LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" "sm_61"{{.*}}[[CUBIN:.+]].o
5959

60-
//
61-
// Test to ensure that we enable handling global constructors in a freestanding
62-
// Nvidia compilation.
63-
//
64-
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
65-
// RUN: | FileCheck -check-prefix=LOWERING %s
66-
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \
67-
// RUN: | FileCheck -check-prefix=LOWERING-LTO %s
68-
69-
// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
70-
// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
71-
// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor"
72-
7360
//
7461
// Test passing arguments directly to nvlink.
7562
//

clang/test/SemaCUDA/device-var-init.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,12 @@ void instantiate() {
485485
bar<NontrivialInitializer><<<1, 1>>>();
486486
// expected-note@-1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}}
487487
}
488+
489+
__device__ void *ptr1 = nullptr;
490+
__device__ void *ptr2 = ptr1;
491+
// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
492+
493+
__device__ [[gnu::constructor(101)]] void ctor() {}
494+
// expected-error@-1 {{CUDA does not support global constructors for __device__ functions}}
495+
__device__ [[gnu::destructor(101)]] void dtor() {}
496+
// expected-error@-1 {{CUDA does not support global destructors for __device__ functions}}

libc/cmake/modules/LLVMLibCTestRules.cmake

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -560,14 +560,12 @@ function(add_integration_test test_name)
560560
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
561561
target_link_options(${fq_build_target_name} PRIVATE
562562
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
563-
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
564-
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
563+
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib -static
565564
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
566565
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
567566
target_link_options(${fq_build_target_name} PRIVATE
568567
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
569568
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
570-
"-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
571569
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
572570
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
573571
"--cuda-path=${LIBC_CUDA_ROOT}")

libcxx/test/configs/nvptx-libc++-shared.cfg.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ config.substitutions.append(('%{link_flags}',
1010
'-nostdlib++ -startfiles -stdlib '
1111
'-L %{lib-dir} -lc++ -lc++abi '
1212
'-Wl,--suppress-stack-size-warning '
13-
'-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
14-
'-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
1513
))
1614
config.substitutions.append(('%{exec}',
1715
'%{executor} --no-parallelism'

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,6 @@
9191

9292
using namespace llvm;
9393

94-
static cl::opt<bool>
95-
LowerCtorDtor("nvptx-lower-global-ctor-dtor",
96-
cl::desc("Lower GPU ctor / dtors to globals on the device."),
97-
cl::init(false), cl::Hidden);
98-
9994
#define DEPOTNAME "__local_depot"
10095

10196
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -794,22 +789,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
794789
if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
795790
report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
796791

797-
// OpenMP supports NVPTX global constructors and destructors.
798-
bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
799-
800-
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
801-
!LowerCtorDtor && !IsOpenMP) {
802-
report_fatal_error(
803-
"Module has a nontrivial global ctor, which NVPTX does not support.");
804-
return true; // error
805-
}
806-
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
807-
!LowerCtorDtor && !IsOpenMP) {
808-
report_fatal_error(
809-
"Module has a nontrivial global dtor, which NVPTX does not support.");
810-
return true; // error
811-
}
812-
813792
// We need to call the parent's one explicitly.
814793
bool Result = AsmPrinter::doInitialization(M);
815794

llvm/test/CodeGen/NVPTX/global-ctor.ll

Lines changed: 0 additions & 9 deletions
This file was deleted.

llvm/test/CodeGen/NVPTX/global-dtor.ll

Lines changed: 0 additions & 9 deletions
This file was deleted.

llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
; Make sure we get the same result if we run multiple times
1010
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
11-
; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
11+
; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
1212

1313
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
1414
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]

0 commit comments

Comments
 (0)