-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Metadata] Handle memprof, callsite merging when one is missing. #132106
base: main
Are you sure you want to change the base?
[Metadata] Handle memprof, callsite merging when one is missing. #132106
Conversation
For memprof and callsite metadata we want to pick one deterministically and keep that even if one of them may be missing.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms Author: Snehasish Kumar (snehasish) ChangesFor memprof and callsite metadata we want to pick one deterministically Full diff: https://github.com/llvm/llvm-project/pull/132106.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 95f0d099aacb5..161c7c875e0eb 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3355,8 +3355,14 @@ static void combineMetadata(Instruction *K, const Instruction *J,
case LLVMContext::MD_invariant_group:
// Preserve !invariant.group in K.
break;
+ // Keep empty cases for mmra, memprof, and callsite to prevent them from
+ // being removed as unknown metadata. The actual merging is handled
+ // separately below.
case LLVMContext::MD_mmra:
- // Combine MMRAs
+ [[fallthrough]];
+ case LLVMContext::MD_memprof:
+ [[fallthrough]];
+ case LLVMContext::MD_callsite:
break;
case LLVMContext::MD_align:
if (!AAOnly && (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)))
@@ -3369,14 +3375,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
- case LLVMContext::MD_memprof:
- if (!AAOnly)
- K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
- break;
- case LLVMContext::MD_callsite:
- if (!AAOnly)
- K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
- break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
@@ -3420,6 +3418,26 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_mmra,
MMRAMetadata::combine(K->getContext(), JMMRA, KMMRA));
}
+
+ // Merge memprof metadata.
+ // Handle separately to support cases where only one instruction has the
+ // metadata.
+ auto JMemProf = J->getMetadata(LLVMContext::MD_memprof);
+ auto KMemProf = K->getMetadata(LLVMContext::MD_memprof);
+ if (!AAOnly && (JMemProf || KMemProf)) {
+ K->setMetadata(LLVMContext::MD_memprof,
+ MDNode::getMergedMemProfMetadata(KMemProf, JMemProf));
+ }
+
+ // Merge callsite metadata.
+ // Handle separately to support cases where only one instruction has the
+ // metadata.
+ auto JCallSite = J->getMetadata(LLVMContext::MD_callsite);
+ auto KCallSite = K->getMetadata(LLVMContext::MD_callsite);
+ if (!AAOnly && (JCallSite || KCallSite)) {
+ K->setMetadata(LLVMContext::MD_callsite,
+ MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
+ }
}
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
index 10c6aeb26ba76..d15eeb7b69fee 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
@@ -1,5 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-
;; Test to ensure that memprof related metadata is not dropped when
;; instructions are combined. Currently the metadata from the first instruction
;; is kept, which prevents full loss of profile context information.
@@ -32,6 +30,51 @@ if.end: ; preds = %if.else, %if.then
ret ptr %x.0
}
+define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb(
+; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
+; CHECK-NEXT: ret ptr [[CALL]]
+;
+entry:
+ br i1 %b, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+ ret ptr %x.0
+}
+
+define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb(
+; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
+; CHECK-NEXT: ret ptr [[CALL]]
+;
+entry:
+ br i1 %b, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+ ret ptr %x.0
+}
declare ptr @_Znwm(i64) nounwind readonly
@@ -43,9 +86,13 @@ declare ptr @_Znwm(i64) nounwind readonly
!5 = !{!6, !"cold"}
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
!7 = !{i64 123}
-;.
+
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
; CHECK: [[META3]] = !{i64 -852997907418798798}
-;.
+; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]], !"cold"}
+; CHECK: [[META6]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
+; CHECK: [[META7]] = !{i64 123}
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
@@ -1,5 +1,3 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did this get dropped?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Teresa mentioned that when this test was introduced in #121359 it was copied over and not actually created using update_test_checks.py. Since I didn't use the tool to generate the checks for the new tests I dropped it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it not possible to use generated checks in this test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably possible to do so after splitting out the additional test cases I added into their own files. Is it a requirement for SimplifyCFG tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Teresa mentioned that when this test was introduced in #121359 it was copied over and not actually created using update_test_checks.py. Since I didn't use the tool to generate the checks for the new tests I dropped it.
IIRC I copied the test from somewhere else so did run update_test_checks.py as in the original test. Does it not work to use that for the new tests?
For memprof and callsite metadata we want to pick one deterministically
and keep that even if one of them may be missing.