Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3950,10 +3950,26 @@ let Visibility = [ClangOption, CC1Option, FC1Option, FlangOption] in {
let Group = f_Group in {

def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">;
def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">;
def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">;
def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">;
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">;
def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">,
HelpText<"Allow the optimizer to discretely increase the number of "
"teams. May cause ignore environment variables that set "
"the number of teams to be ignored. The combination of "
"-fopenmp-assume-teams-oversubscription "
"and -fopenmp-assume-threads-oversubscription "
"may allow the conversion of loops into sequential code by "
"ensuring that each team/thread executes at most one iteration.">;
def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">,
HelpText<"Allow the optimizer to discretely increase the number of "
"threads. May cause ignore environment variables that set "
"the number of threads to be ignored. The combination of "
"-fopenmp-assume-teams-oversubscription "
"and -fopenmp-assume-threads-oversubscription "
"may allow the conversion of loops into sequential code by "
"ensuring that each team/thread executes at most one iteration.">;
def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">,
HelpText<"Do not assume teams oversubscription.">;
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
HelpText<"Do not assume threads oversubscription.">;
def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">,
HelpText<"Assert no thread in a parallel region modifies an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
Expand Down
8 changes: 6 additions & 2 deletions llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1085,11 +1085,13 @@ class OpenMPIRBuilder {
/// preheader of the loop.
/// \param LoopType Information about type of loop worksharing.
/// It corresponds to type of loop workshare OpenMP pragma.
/// \param NoLoop If true, no-loop code is generated.
///
/// \returns Point where to insert code after the workshare construct.
InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
InsertPointTy AllocaIP,
omp::WorksharingLoopType LoopType);
omp::WorksharingLoopType LoopType,
bool NoLoop);

/// Modifies the canonical loop to be a statically-scheduled workshare loop.
///
Expand Down Expand Up @@ -1209,6 +1211,7 @@ class OpenMPIRBuilder {
/// present.
/// \param LoopType Information about type of loop worksharing.
/// It corresponds to type of loop workshare OpenMP pragma.
/// \param NoLoop If true, no-loop code is generated.
///
/// \returns Point where to insert code after the workshare construct.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(
Expand All @@ -1219,7 +1222,8 @@ class OpenMPIRBuilder {
bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
bool HasOrderedClause = false,
omp::WorksharingLoopType LoopType =
omp::WorksharingLoopType::ForStaticLoop);
omp::WorksharingLoopType::ForStaticLoop,
bool NoLoop = false);

/// Tile a loop nest.
///
Expand Down
23 changes: 12 additions & 11 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4979,7 +4979,7 @@ static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder,
WorksharingLoopType LoopType,
BasicBlock *InsertBlock, Value *Ident,
Value *LoopBodyArg, Value *TripCount,
Function &LoopBodyFn) {
Function &LoopBodyFn, bool NoLoop) {
Type *TripCountTy = TripCount->getType();
Module &M = OMPBuilder->M;
IRBuilder<> &Builder = OMPBuilder->Builder;
Expand Down Expand Up @@ -5007,16 +5007,18 @@ static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder,
RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
RealArgs.push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
} else {
RealArgs.push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
}
RealArgs.push_back(ConstantInt::get(Builder.getInt8Ty(), 0));

Builder.CreateCall(RTLFn, RealArgs);
}

static void workshareLoopTargetCallback(
OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident,
Function &OutlinedFn, const SmallVector<Instruction *, 4> &ToBeDeleted,
WorksharingLoopType LoopType) {
WorksharingLoopType LoopType, bool NoLoop) {
IRBuilder<> &Builder = OMPIRBuilder->Builder;
BasicBlock *Preheader = CLI->getPreheader();
Value *TripCount = CLI->getTripCount();
Expand Down Expand Up @@ -5063,17 +5065,16 @@ static void workshareLoopTargetCallback(
OutlinedFnCallInstruction->eraseFromParent();

createTargetLoopWorkshareCall(OMPIRBuilder, LoopType, Preheader, Ident,
LoopBodyArg, TripCount, OutlinedFn);
LoopBodyArg, TripCount, OutlinedFn, NoLoop);

for (auto &ToBeDeletedItem : ToBeDeleted)
ToBeDeletedItem->eraseFromParent();
CLI->invalidate();
}

OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
InsertPointTy AllocaIP,
WorksharingLoopType LoopType) {
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
WorksharingLoopType LoopType, bool NoLoop) {
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
Expand Down Expand Up @@ -5156,7 +5157,7 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
OI.PostOutlineCB = [=, ToBeDeletedVec =
std::move(ToBeDeleted)](Function &OutlinedFn) {
workshareLoopTargetCallback(this, CLI, Ident, OutlinedFn, ToBeDeletedVec,
LoopType);
LoopType, NoLoop);
};
addOutlineInfo(std::move(OI));
return CLI->getAfterIP();
Expand All @@ -5167,9 +5168,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
bool HasSimdModifier, bool HasMonotonicModifier,
bool HasNonmonotonicModifier, bool HasOrderedClause,
WorksharingLoopType LoopType) {
WorksharingLoopType LoopType, bool NoLoop) {
if (Config.isTargetDevice())
return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType);
return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType, NoLoop);
OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
HasNonmonotonicModifier, HasOrderedClause);
Expand Down
159 changes: 159 additions & 0 deletions llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -574,5 +574,164 @@ exit:
ret void
}

define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ]
br label %loop

loop:
%iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
store ptr %iv, ptr %iv
%iv.next = getelementptr i8, ptr %iv, i64 4
%ec = icmp ne ptr %iv.next, %end
br i1 %ec, label %loop, label %exit

exit:
ret void
}

define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ]
call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ]
br label %loop

loop:
%iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
store ptr %iv, ptr %iv
%iv.next = getelementptr i8, ptr %iv, i64 4
%ec = icmp ne ptr %iv.next, %end
br i1 %ec, label %loop, label %exit

exit:
ret void
}

define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
br label %loop

loop:
%iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
store ptr %iv, ptr %iv
%iv.next = getelementptr i8, ptr %iv, i64 4
%ec = icmp ne ptr %iv.next, %end
br i1 %ec, label %loop, label %exit

exit:
ret void
}

declare i1 @cond()

define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors'
; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors
; CHECK-NEXT: %c = call i1 @cond()
; CHECK-NEXT: --> %c U: full-set S: full-set
; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 4) ]
%c = call i1 @cond()
br i1 %c, label %then, label %else

then:
br label %loop

else:
br label %loop

loop:
%iv = phi ptr [ %start, %then] , [ %start, %else ], [ %iv.next, %loop ]
store ptr %iv, ptr %iv
%iv.next = getelementptr i8, ptr %iv, i64 4
%ec = icmp ne ptr %iv.next, %end
br i1 %ec, label %loop, label %exit

exit:
ret void
}

declare void @llvm.assume(i1)
declare void @llvm.experimental.guard(i1, ...)
14 changes: 12 additions & 2 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,24 @@ def TargetRegionFlagsNone : I32BitEnumAttrCaseNone<"none">;
def TargetRegionFlagsGeneric : I32BitEnumAttrCaseBit<"generic", 0>;
def TargetRegionFlagsSpmd : I32BitEnumAttrCaseBit<"spmd", 1>;
def TargetRegionFlagsTripCount : I32BitEnumAttrCaseBit<"trip_count", 2>;
def TargetRegionFlagsNoLoop : I32BitEnumAttrCaseBit<"no_loop", 3>;

def TargetRegionFlags : OpenMP_BitEnumAttr<
"TargetRegionFlags",
"target region property flags", [
"These flags describe properties of the target kernel. "
"TargetRegionFlagsGeneric - denotes generic kernel. "
"TargetRegionFlagsSpmd - denotes SPMD kernel. "
"TargetRegionFlagsNoLoop - denotes kernel where "
"num_teams * num_threads >= loop_trip_count. It allows the conversion "
"of loops into sequential code by ensuring that each team/thread "
"executes at most one iteration. "
"TargetRegionFlagsTripCount - checks if the loop trip count should be "
"calculated.", [
TargetRegionFlagsNone,
TargetRegionFlagsGeneric,
TargetRegionFlagsSpmd,
TargetRegionFlagsTripCount
TargetRegionFlagsTripCount,
TargetRegionFlagsNoLoop
]>;

//===----------------------------------------------------------------------===//
Expand Down
Loading