Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Enable select freeze poison folding when storing value #129776

Merged
merged 1 commit into from
Mar 20, 2025

Conversation

jmciver
Copy link
Contributor

@jmciver jmciver commented Mar 4, 2025

The non-freeze poison argument to select can be one of the following: global,
constant, and noundef arguments.

Alive2 test validation: https://alive2.llvm.org/ce/z/jbtCS6

Copy link
Contributor Author

jmciver commented Mar 4, 2025

@llvmbot
Copy link
Member

llvmbot commented Mar 5, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-ir

Author: John McIver (jmciver)

Changes

The non-freeze poison argument to select can be one of the following: global,
constant, and noundef arguments.

Alive2 test validation: https://alive2.llvm.org/ce/z/jbtCS6


Full diff: https://github.com/llvm/llvm-project/pull/129776.diff

3 Files Affected:

  • (modified) llvm/include/llvm/IR/PatternMatch.h (+13)
  • (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+16-6)
  • (modified) llvm/test/Transforms/InstCombine/select.ll (+3-6)
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index b3eeb1d7ba88a..c66235f33cd9e 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -3124,6 +3124,19 @@ inline auto m_c_LogicalOp(const LHS &L, const RHS &R) {
   return m_LogicalOp<LHS, RHS, /*Commutable=*/true>(L, R);
 }
 
+struct GuaranteedNotToBeUndefOrPoison_match {
+  template <typename ITy> bool match(ITy *V) {
+    if (auto *AsValue = dyn_cast<Value>(V))
+      return isGuaranteedNotToBeUndefOrPoison(AsValue);
+    else
+      return false;
+  }
+};
+
+inline GuaranteedNotToBeUndefOrPoison_match m_guaranteedNotToBeUndefOrPoison() {
+  return GuaranteedNotToBeUndefOrPoison_match();
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4c14dcfb4d75f..6b9254692a857 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4813,15 +4813,22 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
   // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
   //       duplicating logic for binops at least.
   auto getUndefReplacement = [&I](Type *Ty) {
-    Constant *BestValue = nullptr;
-    Constant *NullValue = Constant::getNullValue(Ty);
+    Value *BestValue = nullptr;
+    Value *NullValue = Constant::getNullValue(Ty);
     for (const auto *U : I.users()) {
-      Constant *C = NullValue;
+      Value *C = NullValue;
       if (match(U, m_Or(m_Value(), m_Value())))
         C = ConstantInt::getAllOnesValue(Ty);
       else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
         C = ConstantInt::getTrue(Ty);
-
+      else if (I.hasOneUse() &&
+               match(U, m_c_Select(m_Specific(&I),
+                                   m_guaranteedNotToBeUndefOrPoison()))) {
+        if (match(U->getOperand(1), m_Specific(&I)))
+          C = U->getOperand(2);
+        else
+          C = U->getOperand(1);
+      }
       if (!BestValue)
         BestValue = C;
       else if (BestValue != C)
@@ -4842,8 +4849,11 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
 
   Constant *C;
   if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
-    Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
-    return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
+    Value *Replace = getUndefReplacement(I.getType()->getScalarType());
+    if (Constant *ReplaceC = dyn_cast<Constant>(Replace))
+      return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
+    else
+      return replaceInstUsesWith(I, Replace);
   }
 
   // Replace uses of Op with freeze(Op).
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index cc25f4ce24d9a..789d98197d878 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -4904,8 +4904,7 @@ define i32 @src_simplify_2x_at_once_and(i32 %x, i32 %y) {
 
 define void @select_freeze_poison_parameter(ptr noundef %addr.src, ptr %addr.tgt, i1 %cond) {
 ; CHECK-LABEL: @select_freeze_poison_parameter(
-; CHECK-NEXT:    [[ADDR_SRC:%.*]] = select i1 [[COND:%.*]], ptr [[ADDR_SRC1:%.*]], ptr null
-; CHECK-NEXT:    store ptr [[ADDR_SRC]], ptr [[ADDR_TGT:%.*]], align 8
+; CHECK-NEXT:    store ptr [[ADDR_SRC:%.*]], ptr [[ADDR_TGT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %freeze = freeze ptr poison
@@ -4918,8 +4917,7 @@ define void @select_freeze_poison_parameter(ptr noundef %addr.src, ptr %addr.tgt
 
 define void @select_freeze_poison_global(ptr %addr.tgt, i1 %cond) {
 ; CHECK-LABEL: @select_freeze_poison_global(
-; CHECK-NEXT:    [[SELECT_ADDR:%.*]] = select i1 [[COND:%.*]], ptr @glb, ptr null
-; CHECK-NEXT:    store ptr [[SELECT_ADDR]], ptr [[ADDR_TGT:%.*]], align 8
+; CHECK-NEXT:    store ptr @glb, ptr [[ADDR_TGT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %freeze = freeze ptr poison
@@ -4930,8 +4928,7 @@ define void @select_freeze_poison_global(ptr %addr.tgt, i1 %cond) {
 
 define void @select_freeze_poison_constant(ptr %addr.tgt, i1 %cond) {
 ; CHECK-LABEL: @select_freeze_poison_constant(
-; CHECK-NEXT:    [[SELECT_ADDR:%.*]] = select i1 [[COND:%.*]], i32 72, i32 0
-; CHECK-NEXT:    store i32 [[SELECT_ADDR]], ptr [[ADDR_TGT:%.*]], align 4
+; CHECK-NEXT:    store i32 72, ptr [[ADDR_TGT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %freeze = freeze i32 poison

@jmciver jmciver requested review from dmpots and dtcxzyw March 5, 2025 04:24
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do this fold in InstSimplify: https://alive2.llvm.org/ce/z/Dm53TP

However, we should wait for the following things before working on more simplifications with freeze poison:

  1. Remove freeze poison -> null canonicalization in InstCombine
  2. Replace existing undef handling logic with freeze poison

See also #119884 (comment)

@nikic
Copy link
Contributor

nikic commented Mar 5, 2025

We can do this fold in InstSimplify: https://alive2.llvm.org/ce/z/Dm53TP

The transform is only valid if the freeze(poison) is one-use. And I don't think that InstSimplify should be doing any use-based checks. So I think InstCombine is the right place for it.

@jmciver
Copy link
Contributor Author

jmciver commented Mar 6, 2025

@nikic and @dtcxzyw thanks for the feedback.

This patch does bypasses the need for freeze poison -> null canonicalization removal in InstCombine. To provide context I am seeing the lack of store of select freeze poison folding in uninitialized memory semantics work that I am doing with @nlopes.

I would like to incorporate this and then once freeze poison -> null canonicalization is removed from InstCombine refactor appropriately. Would this be acceptable?

@dtcxzyw
Copy link
Member

dtcxzyw commented Mar 6, 2025

I would like to incorporate this and then once freeze poison -> null canonicalization is removed from InstCombine refactor appropriately. Would this be acceptable?

I don't mean to block this patch. I just worry that these patches may not be well tested (fuzzers/compile-time tracker/llvm-opt-benchmark) until we remove the canonicalization.

@jmciver
Copy link
Contributor Author

jmciver commented Mar 6, 2025

@dtcxzyw I was not aware I should be using a fuzzer. What tool would you recommend?

From a correctness standpoint the match is relatively narrow and I have discussed with @nlopes extensively. The patch has been tested with bootstrap build of LLVM running all regressions. Additionally I have used the llvm-test-suite for correctness checks.

I have sent a request to @nikita asking for compile-time tracker access and can report results once available.

I also have used the patch in conjunction with 15+ Phoronix benchmarks. The optimization involving the fold of global is something I have seen in our memory semantics work in testing FFTW.

@nunoplopes
Copy link
Member

FWIW, we have been using this patch internally and it helps substancial in a couple of benchmarks.

Base automatically changed from users/jmciver/select-freeze-poison-1 to main March 6, 2025 09:35
@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 99f2168 to 97d6bec Compare March 7, 2025 03:58
@jmciver
Copy link
Contributor Author

jmciver commented Mar 7, 2025

Performance benchmarks are queued; I will post once available.

@@ -4842,8 +4847,11 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {

Constant *C;
if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some vector tests?

define <2 x i8> @src(i1 %cond, <2 x i8> noundef %y) {
  %undef = freeze <2 x i8> <i8 0, i8 poison>
  %sel = select i1 %cond, <2 x i8> %y, <2 x i8> %undef
  ret <2 x i8> %sel
}

define <2 x i8> @tgt(i1 %cond, <2 x i8> noundef %y) {
  ret <2 x i8> %y
}

It is a miscompilation: https://alive2.llvm.org/ce/z/8Q3XYH

Copy link
Member

@dtcxzyw dtcxzyw Mar 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, as it is not suitable to handle this fold under getUndefReplacement, can we move the logic into InstCombinerImpl::visitSelect?

Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
...
    if (match(TrueVal, m_OneUse(m_Freeze(m_Poison()))) &&
        isGuaranteedNotToBeUndefOrPoison(FalseVal, &AC, &I, &DT))
      return replaceInstUsesWith(FalseVal);
    if (match(FalseVal, m_OneUse(m_Freeze(m_Poison()))) &&
        isGuaranteedNotToBeUndefOrPoison(TrueVal, &AC, &I, &DT))
      return replaceInstUsesWith(TrueVal);

Copy link
Contributor Author

@jmciver jmciver Mar 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing out the miscompilation!

The reason I did not place this logic into InstCombinerImpl::visitSelect was the canonical form produced by InstCombinerImpl::visitFreeze occurs first and thus ruins the match on freeze poison when visitation is performed on the select instruction.

I experimented a bit and derived a two match expression in the lambda function.

Added basic vector tests. Alive2 validation: https://alive2.llvm.org/ce/z/YFA-5S

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the canonical form produced by InstCombinerImpl::visitFreeze occurs first and thus ruins the match on freeze poison when visitation is performed on the select instruction.

Should we remove these canonicalizations first?

@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 97d6bec to 892c18e Compare March 8, 2025 04:58
@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 892c18e to 790de08 Compare March 10, 2025 04:40
@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 790de08 to dcf20b2 Compare March 13, 2025 05:48
@jmciver
Copy link
Contributor Author

jmciver commented Mar 13, 2025

I refactored the select fold into helper getValueReplacement, which can make use of the freeze argument test in the caller: match(Op0, m_Undef()). The isGuaranteedNotToBeUndefOrPoison call is located in the lambda body. Multi-use
freeze instruction are not common so the execution impact should be generally low, but the readability is improved.

Copy link

github-actions bot commented Mar 13, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from dcf20b2 to cd63f3a Compare March 13, 2025 05:57
@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from cd63f3a to b1f9e87 Compare March 14, 2025 05:36
@jmciver
Copy link
Contributor Author

jmciver commented Mar 14, 2025

@nikita I refactored the replacement of poison elements in constant vectors to use the first non-poison element. I am not sure if I need to do something for aggregates.

@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from b1f9e87 to 4b22bc2 Compare March 18, 2025 04:19
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some nits, but the general approach looks good to me now.

auto getUndefReplacement = [&I](Type *Ty) {
Constant *BestValue = nullptr;
Constant *NullValue = Constant::getNullValue(Ty);
auto getUndefReplacement = [&I, &AC = this->AC, &DT = this->DT](Type *Ty) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto getUndefReplacement = [&I, &AC = this->AC, &DT = this->DT](Type *Ty) {
auto getUndefReplacement = [&](Type *Ty) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I meant here is to use [&] to capture everything instead of listing all the variables. You can still use AC and DT if you do that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My mistake; I will fix that.

@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 4b22bc2 to d3abfc1 Compare March 19, 2025 20:48
@jmciver
Copy link
Contributor Author

jmciver commented Mar 19, 2025

Update as per reviews. Add constant expression vector tests.

@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch 2 times, most recently from 15b33e4 to 7878416 Compare March 19, 2025 22:29
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

Copy link
Contributor Author

jmciver commented Mar 20, 2025

Merge activity

  • Mar 19, 11:44 PM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Mar 19, 11:47 PM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 19, 11:49 PM EDT: A user merged this pull request with Graphite.

The non-freeze poison argument to select can be one of the following: global,
constant, and noundef arguments.

Alive2 test validation:
- https://alive2.llvm.org/ce/z/jbtCS6
- https://alive2.llvm.org/ce/z/YFA-5S

Refactor replacement of poison elements in constant vectors to use the first
non-poison element.
@jmciver jmciver force-pushed the users/jmciver/select-freeze-poison-2 branch from 7878416 to af0ad2f Compare March 20, 2025 03:47
@jmciver jmciver merged commit 9cab82f into main Mar 20, 2025
6 of 10 checks passed
@jmciver jmciver deleted the users/jmciver/select-freeze-poison-2 branch March 20, 2025 03:49
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 20, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime running on omp-vega20-0 while building llvm at step 7 "Add check check-offload".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/18017

Here is the relevant piece of the build log for the reference
Step 7 (Add check check-offload) failure: test (failure)
******************** TEST 'libomptarget :: amdgcn-amd-amdhsa :: offloading/gpupgo/pgo1.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fcreate-profile      -Xarch_device -fprofile-generate
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fcreate-profile -Xarch_device -fprofile-generate
# note: command had no output on stdout or stderr
# RUN: at line 3
env LLVM_PROFILE_FILE=pgo1.c.llvm.profraw      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo1.c.llvm.profraw /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp
# note: command had no output on stdout or stderr
# RUN: at line 5
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo1.c.llvm.profraw |      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c --check-prefix="LLVM-PGO"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo1.c.llvm.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c --check-prefix=LLVM-PGO
# note: command had no output on stdout or stderr
# RUN: at line 9
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fcreate-profile      -Xarch_device -fprofile-instr-generate
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fcreate-profile -Xarch_device -fprofile-instr-generate
# note: command had no output on stdout or stderr
# RUN: at line 11
env LLVM_PROFILE_FILE=pgo1.c.clang.profraw      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo1.c.clang.profraw /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo1.c.tmp
# note: command had no output on stdout or stderr
# RUN: at line 13
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo1.c.clang.profraw |      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c --check-prefix="CLANG-PGO"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo1.c.clang.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c --check-prefix=CLANG-PGO
# .---command stderr------------
# | /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c:63:15: error: CLANG-PGO: expected string not found in input
# | // CLANG-PGO: Block counts: [11, 20]
# |               ^
# | <stdin>:5:19: note: scanning from here
# |  Function count: 0
# |                   ^
# | <stdin>:6:2: note: possible intended match here
# |  Block counts: [12, 20]
# |  ^
# | 
# | Input file: <stdin>
# | Check file: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo1.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 20, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building llvm at step 7 "Add check check-offload".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/14936

Here is the relevant piece of the build log for the reference
Step 7 (Add check check-offload) failure: test (failure)
******************** TEST 'libomptarget :: amdgcn-amd-amdhsa :: offloading/gpupgo/pgo2.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp -Xoffload-linker -lc -Xoffload-linker -lm /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-generate
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp -Xoffload-linker -lc -Xoffload-linker -lm /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-generate
# RUN: at line 2
env LLVM_PROFILE_FILE=pgo2.c.llvm.profraw      /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo2.c.llvm.profraw /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp
# RUN: at line 4
llvm-profdata show --all-functions --counts      pgo2.c.llvm.profraw | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c      --check-prefix="LLVM-HOST"
# executed command: llvm-profdata show --all-functions --counts pgo2.c.llvm.profraw
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=LLVM-HOST
# RUN: at line 7
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo2.c.llvm.profraw      | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix="LLVM-DEVICE"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo2.c.llvm.profraw
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=LLVM-DEVICE
# RUN: at line 11
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp -Xoffload-linker -lc -Xoffload-linker -lm /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-instr-generate
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp -Xoffload-linker -lc -Xoffload-linker -lm /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-instr-generate
# RUN: at line 12
env LLVM_PROFILE_FILE=pgo2.c.clang.profraw      /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo2.c.clang.profraw /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp
# RUN: at line 14
llvm-profdata show --all-functions --counts      pgo2.c.clang.profraw | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c      --check-prefix="CLANG-HOST"
# executed command: llvm-profdata show --all-functions --counts pgo2.c.clang.profraw
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=CLANG-HOST
# RUN: at line 17
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo2.c.clang.profraw |      /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix="CLANG-DEV"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo2.c.clang.profraw
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=CLANG-DEV
# .---command stderr------------
# | /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c:101:15: error: CLANG-DEV: expected string not found in input
# | // CLANG-DEV: Block counts: [11]
# |               ^
# | <stdin>:5:19: note: scanning from here
# |  Function count: 0
# |                   ^
# | <stdin>:6:2: note: possible intended match here
# |  Block counts: [14]
# |  ^
# | 
# | Input file: <stdin>
# | Check file: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants