Skip to content

Commit 94d1453

Browse files
committed
[OpenMP][FIX] More AAExecutionDomain fixes
We missed certain updates, mostly to call site information, and dependent AAs did not get recomputed. We also did not properly distinguish and propagate incoming and outgoing information of call sites. The runtime tests passes now, I'll add a proper test for AAExecutionDomain soon that covers all the cases and ensures we haven't forgotten more updates. To help unblock some apps, I'll put the fix first.
1 parent 3a7cb3d commit 94d1453

File tree

4 files changed

+104
-79
lines changed

4 files changed

+104
-79
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5090,7 +5090,10 @@ struct AAExecutionDomain
50905090
const Instruction &I) const = 0;
50915091

50925092
virtual ExecutionDomainTy getExecutionDomain(const BasicBlock &) const = 0;
5093-
virtual ExecutionDomainTy getExecutionDomain(const CallBase &) const = 0;
5093+
/// Return the execution domain with which the call \p CB is entered and the
5094+
/// one with which it is left.
5095+
virtual std::pair<ExecutionDomainTy, ExecutionDomainTy>
5096+
getExecutionDomain(const CallBase &CB) const = 0;
50945097
virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0;
50955098

50965099
/// This function should return true if the type of the \p AA is

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 99 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -2557,13 +2557,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
25572557
}
25582558

25592559
const std::string getAsStr() const override {
2560-
unsigned TotalBlocks = 0, InitialThreadBlocks = 0;
2560+
unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0;
25612561
for (auto &It : BEDMap) {
2562+
if (!It.getFirst())
2563+
continue;
25622564
TotalBlocks++;
25632565
InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly;
2566+
AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly &&
2567+
It.getSecond().IsReachingAlignedBarrierOnly;
25642568
}
25652569
return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" +
2566-
std::to_string(TotalBlocks) + " executed by initial thread only";
2570+
std::to_string(AlignedBlocks) + " of " +
2571+
std::to_string(TotalBlocks) +
2572+
" executed by initial thread / aligned";
25672573
}
25682574

25692575
/// See AbstractAttribute::trackStatistics().
@@ -2586,7 +2592,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
25862592

25872593
SmallPtrSet<CallBase *, 16> DeletedBarriers;
25882594
auto HandleAlignedBarrier = [&](CallBase *CB) {
2589-
const ExecutionDomainTy &ED = CEDMap[CB];
2595+
const ExecutionDomainTy &ED = CEDMap[{CB, PRE}];
25902596
if (!ED.IsReachedFromAlignedBarrierOnly ||
25912597
ED.EncounteredNonLocalSideEffect)
25922598
return;
@@ -2619,7 +2625,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26192625
// The final aligned barrier (LastCB) reaching the kernel end was
26202626
// removed already. This means we can go one step further and remove
26212627
// the barriers encoutered last before (LastCB).
2622-
const ExecutionDomainTy &LastED = CEDMap[LastCB];
2628+
const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}];
26232629
Worklist.append(LastED.AlignedBarriers.begin(),
26242630
LastED.AlignedBarriers.end());
26252631
}
@@ -2652,12 +2658,12 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26522658
/// Merge all information from \p PredED into the successor \p ED. If
26532659
/// \p InitialEdgeOnly is set, only the initial edge will enter the block
26542660
/// represented by \p ED from this predecessor.
2655-
void mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
2661+
bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
26562662
const ExecutionDomainTy &PredED,
26572663
bool InitialEdgeOnly = false);
26582664

26592665
/// Accumulate information for the entry block in \p EntryBBED.
2660-
void handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
2666+
bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
26612667

26622668
/// See AbstractAttribute::updateImpl.
26632669
ChangeStatus updateImpl(Attributor &A) override;
@@ -2667,6 +2673,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26672673
bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
26682674
if (!isValidState())
26692675
return false;
2676+
assert(BB.getParent() == getAnchorScope() && "Block is out of scope!");
26702677
return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly;
26712678
}
26722679

@@ -2688,7 +2695,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26882695
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
26892696
break;
26902697
}
2691-
const auto &It = CEDMap.find(CB);
2698+
const auto &It = CEDMap.find({CB, PRE});
26922699
if (It == CEDMap.end())
26932700
continue;
26942701
if (!It->getSecond().IsReachingAlignedBarrierOnly)
@@ -2708,26 +2715,12 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
27082715
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
27092716
break;
27102717
}
2711-
const auto &It = CEDMap.find(CB);
2718+
const auto &It = CEDMap.find({CB, POST});
27122719
if (It == CEDMap.end())
27132720
continue;
2714-
if (!AA::isNoSyncInst(A, *CB, *this)) {
2715-
if (It->getSecond().IsReachedFromAlignedBarrierOnly) {
2716-
break;
2717-
}
2718-
return false;
2719-
}
2720-
2721-
Function *Callee = CB->getCalledFunction();
2722-
if (!Callee || Callee->isDeclaration())
2723-
return false;
2724-
const auto &EDAA = A.getAAFor<AAExecutionDomain>(
2725-
*this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
2726-
if (!EDAA.getState().isValidState())
2727-
return false;
2728-
if (!EDAA.getFunctionExecutionDomain().IsReachedFromAlignedBarrierOnly)
2729-
return false;
2730-
break;
2721+
if (It->getSecond().IsReachedFromAlignedBarrierOnly)
2722+
break;
2723+
return false;
27312724
} while ((CurI = CurI->getPrevNonDebugInstruction()));
27322725

27332726
if (!CurI) {
@@ -2750,10 +2743,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
27502743
"No request should be made against an invalid state!");
27512744
return BEDMap.lookup(&BB);
27522745
}
2753-
ExecutionDomainTy getExecutionDomain(const CallBase &CB) const override {
2746+
std::pair<ExecutionDomainTy, ExecutionDomainTy>
2747+
getExecutionDomain(const CallBase &CB) const override {
27542748
assert(isValidState() &&
27552749
"No request should be made against an invalid state!");
2756-
return CEDMap.lookup(&CB);
2750+
return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})};
27572751
}
27582752
ExecutionDomainTy getFunctionExecutionDomain() const override {
27592753
assert(isValidState() &&
@@ -2810,12 +2804,21 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
28102804
/// Mapping containing information about the function for other AAs.
28112805
ExecutionDomainTy InterProceduralED;
28122806

2807+
enum Direction { PRE = 0, POST = 1 };
28132808
/// Mapping containing information per block.
28142809
DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
2815-
DenseMap<const CallBase *, ExecutionDomainTy> CEDMap;
2810+
DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy>
2811+
CEDMap;
28162812
SmallSetVector<CallBase *, 16> AlignedBarriers;
28172813

28182814
ReversePostOrderTraversal<Function *> *RPOT = nullptr;
2815+
2816+
/// Set \p R to \V and report true if that changed \p R.
2817+
static bool setAndRecord(bool &R, bool V) {
2818+
bool Eq = (R == V);
2819+
R = V;
2820+
return !Eq;
2821+
}
28192822
};
28202823

28212824
void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
@@ -2827,26 +2830,33 @@ void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
28272830
ED.addAlignedBarrier(A, *AB);
28282831
}
28292832

2830-
void AAExecutionDomainFunction::mergeInPredecessor(
2833+
bool AAExecutionDomainFunction::mergeInPredecessor(
28312834
Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED,
28322835
bool InitialEdgeOnly) {
2833-
ED.IsExecutedByInitialThreadOnly =
2834-
InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
2835-
ED.IsExecutedByInitialThreadOnly);
2836-
2837-
ED.IsReachedFromAlignedBarrierOnly = ED.IsReachedFromAlignedBarrierOnly &&
2838-
PredED.IsReachedFromAlignedBarrierOnly;
2839-
ED.EncounteredNonLocalSideEffect =
2840-
ED.EncounteredNonLocalSideEffect | PredED.EncounteredNonLocalSideEffect;
2836+
2837+
bool Changed = false;
2838+
Changed |=
2839+
setAndRecord(ED.IsExecutedByInitialThreadOnly,
2840+
InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
2841+
ED.IsExecutedByInitialThreadOnly));
2842+
2843+
Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly,
2844+
ED.IsReachedFromAlignedBarrierOnly &&
2845+
PredED.IsReachedFromAlignedBarrierOnly);
2846+
Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect,
2847+
ED.EncounteredNonLocalSideEffect |
2848+
PredED.EncounteredNonLocalSideEffect);
2849+
// Do not track assumptions and barriers as part of Changed.
28412850
if (ED.IsReachedFromAlignedBarrierOnly)
28422851
mergeInPredecessorBarriersAndAssumptions(A, ED, PredED);
28432852
else
28442853
ED.clearAssumeInstAndAlignedBarriers();
2854+
return Changed;
28452855
}
28462856

2847-
void AAExecutionDomainFunction::handleCallees(Attributor &A,
2857+
bool AAExecutionDomainFunction::handleCallees(Attributor &A,
28482858
ExecutionDomainTy &EntryBBED) {
2849-
SmallVector<ExecutionDomainTy> CallSiteEDs;
2859+
SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs;
28502860
auto PredForCallSite = [&](AbstractCallSite ACS) {
28512861
const auto &EDAA = A.getAAFor<AAExecutionDomain>(
28522862
*this, IRPosition::function(*ACS.getInstruction()->getFunction()),
@@ -2863,9 +2873,10 @@ void AAExecutionDomainFunction::handleCallees(Attributor &A,
28632873
if (A.checkForAllCallSites(PredForCallSite, *this,
28642874
/* RequiresAllCallSites */ true,
28652875
AllCallSitesKnown)) {
2866-
for (const auto &CSED : CallSiteEDs) {
2867-
mergeInPredecessor(A, EntryBBED, CSED);
2868-
ExitED.IsReachingAlignedBarrierOnly &= CSED.IsReachingAlignedBarrierOnly;
2876+
for (const auto &[CSInED, CSOutED] : CallSiteEDs) {
2877+
mergeInPredecessor(A, EntryBBED, CSInED);
2878+
ExitED.IsReachingAlignedBarrierOnly &=
2879+
CSOutED.IsReachingAlignedBarrierOnly;
28692880
}
28702881

28712882
} else {
@@ -2885,10 +2896,17 @@ void AAExecutionDomainFunction::handleCallees(Attributor &A,
28852896
}
28862897
}
28872898

2899+
bool Changed = false;
28882900
auto &FnED = BEDMap[nullptr];
2889-
FnED.IsReachedFromAlignedBarrierOnly &=
2890-
EntryBBED.IsReachedFromAlignedBarrierOnly;
2891-
FnED.IsReachingAlignedBarrierOnly &= ExitED.IsReachingAlignedBarrierOnly;
2901+
Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly,
2902+
FnED.IsReachedFromAlignedBarrierOnly &
2903+
EntryBBED.IsReachedFromAlignedBarrierOnly);
2904+
Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly,
2905+
FnED.IsReachingAlignedBarrierOnly &
2906+
ExitED.IsReachingAlignedBarrierOnly);
2907+
Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly,
2908+
EntryBBED.IsExecutedByInitialThreadOnly);
2909+
return Changed;
28922910
}
28932911

28942912
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2902,27 +2920,23 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
29022920
if (CB)
29032921
Changed |= AlignedBarriers.insert(CB);
29042922
// First, update the barrier ED kept in the separate CEDMap.
2905-
auto &CallED = CEDMap[CB];
2906-
mergeInPredecessor(A, CallED, ED);
2923+
auto &CallInED = CEDMap[{CB, PRE}];
2924+
Changed |= mergeInPredecessor(A, CallInED, ED);
2925+
CallInED.IsReachingAlignedBarrierOnly = true;
29072926
// Next adjust the ED we use for the traversal.
29082927
ED.EncounteredNonLocalSideEffect = false;
29092928
ED.IsReachedFromAlignedBarrierOnly = true;
29102929
// Aligned barrier collection has to come last.
29112930
ED.clearAssumeInstAndAlignedBarriers();
29122931
if (CB)
29132932
ED.addAlignedBarrier(A, *CB);
2933+
auto &CallOutED = CEDMap[{CB, POST}];
2934+
Changed |= mergeInPredecessor(A, CallOutED, ED);
29142935
};
29152936

29162937
auto &LivenessAA =
29172938
A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
29182939

2919-
// Set \p R to \V and report true if that changed \p R.
2920-
auto SetAndRecord = [&](bool &R, bool V) {
2921-
bool Eq = (R == V);
2922-
R = V;
2923-
return !Eq;
2924-
};
2925-
29262940
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29272941

29282942
Function *F = getAnchorScope();
@@ -2940,7 +2954,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
29402954
ExecutionDomainTy ED;
29412955
// Propagate "incoming edges" into information about this block.
29422956
if (IsEntryBB) {
2943-
handleCallees(A, ED);
2957+
Changed |= handleCallees(A, ED);
29442958
} else {
29452959
// For live non-entry blocks we only propagate
29462960
// information via live edges.
@@ -3009,8 +3023,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30093023

30103024
// Record how we entered the call, then accumulate the effect of the
30113025
// call in ED for potential use by the callee.
3012-
auto &CallED = CEDMap[CB];
3013-
mergeInPredecessor(A, CallED, ED);
3026+
auto &CallInED = CEDMap[{CB, PRE}];
3027+
Changed |= mergeInPredecessor(A, CallInED, ED);
30143028

30153029
// If we have a sync-definition we can check if it starts/ends in an
30163030
// aligned barrier. If we are unsure we assume any sync breaks
@@ -3022,7 +3036,6 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30223036
if (EDAA.getState().isValidState()) {
30233037
const auto &CalleeED = EDAA.getFunctionExecutionDomain();
30243038
ED.IsReachedFromAlignedBarrierOnly =
3025-
CallED.IsReachedFromAlignedBarrierOnly =
30263039
CalleeED.IsReachedFromAlignedBarrierOnly;
30273040
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
30283041
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
@@ -3031,20 +3044,27 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30313044
else
30323045
ED.EncounteredNonLocalSideEffect =
30333046
CalleeED.EncounteredNonLocalSideEffect;
3034-
if (!CalleeED.IsReachingAlignedBarrierOnly)
3047+
if (!CalleeED.IsReachingAlignedBarrierOnly) {
3048+
Changed |=
3049+
setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
30353050
SyncInstWorklist.push_back(&I);
3051+
}
30363052
if (CalleeED.IsReachedFromAlignedBarrierOnly)
30373053
mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED);
3054+
auto &CallOutED = CEDMap[{CB, POST}];
3055+
Changed |= mergeInPredecessor(A, CallOutED, ED);
30383056
continue;
30393057
}
30403058
}
3041-
if (!IsNoSync)
3042-
ED.IsReachedFromAlignedBarrierOnly =
3043-
CallED.IsReachedFromAlignedBarrierOnly = false;
3059+
if (!IsNoSync) {
3060+
ED.IsReachedFromAlignedBarrierOnly = false;
3061+
Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
3062+
SyncInstWorklist.push_back(&I);
3063+
}
30443064
AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
30453065
ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
3046-
if (!IsNoSync)
3047-
SyncInstWorklist.push_back(&I);
3066+
auto &CallOutED = CEDMap[{CB, POST}];
3067+
Changed |= mergeInPredecessor(A, CallOutED, ED);
30483068
}
30493069

30503070
if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
@@ -3083,12 +3103,14 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30833103
if (!isa<UnreachableInst>(BB.getTerminator()) &&
30843104
!BB.getTerminator()->getNumSuccessors()) {
30853105

3086-
mergeInPredecessor(A, InterProceduralED, ED);
3106+
Changed |= mergeInPredecessor(A, InterProceduralED, ED);
30873107

30883108
auto &FnED = BEDMap[nullptr];
30893109
if (!FnED.IsReachingAlignedBarrierOnly) {
30903110
IsEndAndNotReachingAlignedBarriersOnly = true;
30913111
SyncInstWorklist.push_back(BB.getTerminator());
3112+
auto &BBED = BEDMap[&BB];
3113+
Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
30923114
}
30933115
if (IsKernel)
30943116
HandleAlignedBarrier(nullptr, ED);
@@ -3120,34 +3142,37 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
31203142
while (!SyncInstWorklist.empty()) {
31213143
Instruction *SyncInst = SyncInstWorklist.pop_back_val();
31223144
Instruction *CurInst = SyncInst;
3123-
bool HitAlignedBarrier = false;
3145+
bool HitAlignedBarrierOrKnownEnd = false;
31243146
while ((CurInst = CurInst->getPrevNode())) {
31253147
auto *CB = dyn_cast<CallBase>(CurInst);
31263148
if (!CB)
31273149
continue;
3128-
auto &CallED = CEDMap[CB];
3129-
if (SetAndRecord(CallED.IsReachingAlignedBarrierOnly, false))
3150+
auto &CallOutED = CEDMap[{CB, POST}];
3151+
if (setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false))
31303152
Changed = true;
3131-
HitAlignedBarrier = AlignedBarriers.count(CB);
3132-
if (HitAlignedBarrier)
3153+
auto &CallInED = CEDMap[{CB, PRE}];
3154+
HitAlignedBarrierOrKnownEnd =
3155+
AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly;
3156+
if (HitAlignedBarrierOrKnownEnd)
31333157
break;
31343158
}
3135-
if (HitAlignedBarrier)
3159+
if (HitAlignedBarrierOrKnownEnd)
31363160
continue;
31373161
BasicBlock *SyncBB = SyncInst->getParent();
31383162
for (auto *PredBB : predecessors(SyncBB)) {
31393163
if (LivenessAA.isEdgeDead(PredBB, SyncBB))
31403164
continue;
31413165
if (!Visited.insert(PredBB))
31423166
continue;
3143-
SyncInstWorklist.push_back(PredBB->getTerminator());
31443167
auto &PredED = BEDMap[PredBB];
3145-
if (SetAndRecord(PredED.IsReachingAlignedBarrierOnly, false))
3168+
if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) {
31463169
Changed = true;
3170+
SyncInstWorklist.push_back(PredBB->getTerminator());
3171+
}
31473172
}
31483173
if (SyncBB != &EntryBB)
31493174
continue;
3150-
if (SetAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false))
3175+
if (setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false))
31513176
Changed = true;
31523177
}
31533178

llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ define void @kernel() "kernel" {
3737
; CHECK: if.else:
3838
; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
3939
; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]]
40-
; CHECK-NEXT: call void @llvm.assume(i1 true)
40+
; CHECK-NEXT: call void @llvm.assume(i1 undef)
4141
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
4242
; CHECK-NEXT: br label [[IF_MERGE]]
4343
; CHECK: if.merge:

0 commit comments

Comments
 (0)