Skip to content

Commit 24f88f5

Browse files
committed
[OpenMP] Accept shortened triples for -Xopenmp-target=
This patch builds on the change in D117634 that expanded the short triples when passed in by the user. This patch adds the same functionality for the `-Xopenmp-target=` flag. Previously it was unintuitive that passing `-fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 <arg>` would not forward the arg because the triples did not match on account of `nvptx64` being expanded to `nvptx64-nvidia-cuda`. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D118495
1 parent fad7e49 commit 24f88f5

File tree

4 files changed

+27
-14
lines changed

4 files changed

+27
-14
lines changed

clang/include/clang/Driver/ToolChain.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,22 @@ class ToolChain {
711711
const llvm::fltSemantics *FPType = nullptr) const {
712712
return llvm::DenormalMode::getIEEE();
713713
}
714+
715+
// We want to expand the shortened versions of the triples passed in to
716+
// the values used for the bitcode libraries.
717+
static llvm::Triple getOpenMPTriple(StringRef TripleStr) {
718+
llvm::Triple TT(TripleStr);
719+
if (TT.getVendor() == llvm::Triple::UnknownVendor ||
720+
TT.getOS() == llvm::Triple::UnknownOS) {
721+
if (TT.getArch() == llvm::Triple::nvptx)
722+
return llvm::Triple("nvptx-nvidia-cuda");
723+
if (TT.getArch() == llvm::Triple::nvptx64)
724+
return llvm::Triple("nvptx64-nvidia-cuda");
725+
if (TT.getArch() == llvm::Triple::amdgcn)
726+
return llvm::Triple("amdgcn-amd-amdhsa");
727+
}
728+
return TT;
729+
}
714730
};
715731

716732
/// Set a ToolChain's effective triple. Reset it when the registration object

clang/lib/Driver/Driver.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -792,21 +792,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
792792
if (HasValidOpenMPRuntime) {
793793
llvm::StringMap<const char *> FoundNormalizedTriples;
794794
for (const char *Val : OpenMPTargets->getValues()) {
795-
llvm::Triple TT(Val);
795+
llvm::Triple TT(ToolChain::getOpenMPTriple(Val));
796796
std::string NormalizedName = TT.normalize();
797797

798-
// We want to expand the shortened versions of the triples passed in to
799-
// the values used for the bitcode libraries for convenience.
800-
if (TT.getVendor() == llvm::Triple::UnknownVendor ||
801-
TT.getOS() == llvm::Triple::UnknownOS) {
802-
if (TT.getArch() == llvm::Triple::nvptx)
803-
TT = llvm::Triple("nvptx-nvidia-cuda");
804-
else if (TT.getArch() == llvm::Triple::nvptx64)
805-
TT = llvm::Triple("nvptx64-nvidia-cuda");
806-
else if (TT.getArch() == llvm::Triple::amdgcn)
807-
TT = llvm::Triple("amdgcn-amd-amdhsa");
808-
}
809-
810798
// Make sure we don't have a duplicate triple.
811799
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
812800
if (Duplicate != FoundNormalizedTriples.end()) {

clang/lib/Driver/ToolChain.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,8 +1129,10 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs(
11291129
A->getOption().matches(options::OPT_Xopenmp_target);
11301130

11311131
if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
1132+
llvm::Triple TT(getOpenMPTriple(A->getValue(0)));
1133+
11321134
// Passing device args: -Xopenmp-target=<triple> -opt=val.
1133-
if (A->getValue(0) == getTripleString())
1135+
if (TT.getTriple() == getTripleString())
11341136
Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
11351137
else
11361138
continue;

clang/test/Driver/openmp-offload-gpu.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,10 @@
343343
// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
344344

345345
// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
346+
347+
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_35 \
348+
// RUN: -save-temps -no-canonical-prefixes %s -o openmp-offload-gpu 2>&1 \
349+
// RUN: | FileCheck -check-prefix=TRIPLE %s
350+
351+
// TRIPLE: "-triple" "nvptx64-nvidia-cuda"
352+
// TRIPLE: "-target-cpu" "sm_35"

0 commit comments

Comments
 (0)